From 16ae2c8c89bb5e33e687097b3ed31b8d578a2056 Mon Sep 17 00:00:00 2001 From: Neel Shah Date: Wed, 10 Apr 2024 16:48:04 +0200 Subject: [PATCH] Update key, unit and tags sanitization logic for metrics (#2292) --- CHANGELOG.md | 4 ++ sentry-ruby/lib/sentry/metrics/aggregator.rb | 32 ++++++++++++---- .../spec/sentry/metrics/aggregator_spec.rb | 38 +++++++++++++++++-- 3 files changed, 64 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad333097e..5acb9f05d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## Unreleased +### Features + +- Update key, unit and tags sanitization logic for metrics [#2292](https://github.com/getsentry/sentry-ruby/pull/2292) + ### Bug Fixes - Make sure isolated envelopes respect `config.enabled_environments` [#2291](https://github.com/getsentry/sentry-ruby/pull/2291) diff --git a/sentry-ruby/lib/sentry/metrics/aggregator.rb b/sentry-ruby/lib/sentry/metrics/aggregator.rb index 020f961d6..e02def2c0 100644 --- a/sentry-ruby/lib/sentry/metrics/aggregator.rb +++ b/sentry-ruby/lib/sentry/metrics/aggregator.rb @@ -12,8 +12,18 @@ class Aggregator # when we record code locations DEFAULT_STACKLEVEL = 4 - KEY_SANITIZATION_REGEX = /[^a-zA-Z0-9_\/.-]+/ - VALUE_SANITIZATION_REGEX = /[^[[:word:]][[:digit:]][[:space:]]_:\/@\.{}\[\]$-]+/ + KEY_SANITIZATION_REGEX = /[^a-zA-Z0-9_\-.]+/ + UNIT_SANITIZATION_REGEX = /[^a-zA-Z0-9_]+/ + TAG_KEY_SANITIZATION_REGEX = /[^a-zA-Z0-9_\-.\/]+/ + + TAG_VALUE_SANITIZATION_MAP = { + "\n" => "\\n", + "\r" => "\\r", + "\t" => "\\t", + "\\" => "\\\\", + "|" => "\\u{7c}", + "," => "\\u{2c}" + } METRIC_TYPES = { c: CounterMetric, @@ -180,9 +190,9 @@ def serialize_buckets(buckets) timestamp_buckets.map do |metric_key, metric| type, key, unit, tags = metric_key values = metric.serialize.join(':') - sanitized_tags = tags.map { |k, v| "#{sanitize_key(k)}:#{sanitize_value(v)}" }.join(',') + sanitized_tags = tags.map { |k, v| "#{sanitize_tag_key(k)}:#{sanitize_tag_value(v)}" }.join(',') - "#{sanitize_key(key)}@#{unit}:#{values}|#{type}|\##{sanitized_tags}|T#{timestamp}" + "#{sanitize_key(key)}@#{sanitize_unit(unit)}:#{values}|#{type}|\##{sanitized_tags}|T#{timestamp}" end end.flatten.join("\n") end @@ -190,7 +200,7 @@ def serialize_buckets(buckets) def serialize_locations(timestamp, locations) mapping = locations.map do |meta_key, location| type, key, unit = meta_key - mri = "#{type}:#{sanitize_key(key)}@#{unit}" + mri = "#{type}:#{sanitize_key(key)}@#{sanitize_unit(unit)}" # note this needs to be an array but it really doesn't serve a purpose right now [mri, [location.merge(type: 'location')]] @@ -203,8 +213,16 @@ def sanitize_key(key) key.gsub(KEY_SANITIZATION_REGEX, '_') end - def sanitize_value(value) - value.gsub(VALUE_SANITIZATION_REGEX, '') + def sanitize_unit(unit) + unit.gsub(UNIT_SANITIZATION_REGEX, '') + end + + def sanitize_tag_key(key) + key.gsub(TAG_KEY_SANITIZATION_REGEX, '') + end + + def sanitize_tag_value(value) + value.chars.map { |c| TAG_VALUE_SANITIZATION_MAP[c] || c }.join end def get_transaction_name diff --git a/sentry-ruby/spec/sentry/metrics/aggregator_spec.rb b/sentry-ruby/spec/sentry/metrics/aggregator_spec.rb index 7ac5a8c2e..f397d07d5 100644 --- a/sentry-ruby/spec/sentry/metrics/aggregator_spec.rb +++ b/sentry-ruby/spec/sentry/metrics/aggregator_spec.rb @@ -381,7 +381,7 @@ incr, dist = item.payload.split("\n") expect(incr).to eq("incr@none:10.0|c|#environment:test,release:test-release|T#{fake_time.to_i - 3}") expect(dist).to eq("dis_t@second:0.0:1.0:2.0:3.0:4.0|d|" + - "#environment:test,fo_-bar:snöwmän 23{},release:test-release|" + + "#environment:test,fo-bar:snöwmän% 23{},release:test-release|" + "T#{fake_time.to_i - 3}") end @@ -431,15 +431,47 @@ incr1, dist1, incr2, dist2 = item.payload.split("\n") expect(incr1).to eq("incr@none:10.0|c|#environment:test,release:test-release|T#{fake_time.to_i - 3}") expect(dist1).to eq("dis_t@second:0.0:1.0:2.0:3.0:4.0|d|" + - "#environment:test,fo_-bar:snöwmän 23{},release:test-release|" + + "#environment:test,fo-bar:snöwmän% 23{},release:test-release|" + "T#{fake_time.to_i - 3}") expect(incr2).to eq("incr@none:5.0|c|#environment:test,release:test-release|T#{fake_time.to_i + 7}") expect(dist2).to eq("dis_t@second:5.0:6.0:7.0:8.0:9.0|d|" + - "#environment:test,fo_-bar:snöwmän 23{},release:test-release|" + + "#environment:test,fo-bar:snöwmän% 23{},release:test-release|" + "T#{fake_time.to_i + 7}") end end end + + context 'sanitization' do + it 'sanitizes the metric key' do + subject.add(:c, 'foo.disöt_12-bar', 1) + subject.flush(force: true) + + sanitized_key = 'foo.dis_t_12-bar' + statsd, metrics_meta = sentry_envelopes.first.items.map(&:payload) + expect(statsd).to include(sanitized_key) + expect(metrics_meta[:mapping].keys.first).to include(sanitized_key) + end + + it 'sanitizes the metric unit' do + subject.add(:c, 'incr', 1, unit: 'disöt_12-/.test') + subject.flush(force: true) + + sanitized_unit = '@dist_12test' + statsd, metrics_meta = sentry_envelopes.first.items.map(&:payload) + expect(statsd).to include(sanitized_unit) + expect(metrics_meta[:mapping].keys.first).to include(sanitized_unit) + end + + it 'sanitizes tag keys and values' do + tags = { "get.foö-$bar/12" => "hello!\n\r\t\\ 42 this | or , that" } + subject.add(:c, 'incr', 1, tags: tags) + subject.flush(force: true) + + sanitized_tags = "get.fo-bar/12:hello!\\n\\r\\t\\\\ 42 this \\u{7c} or \\u{2c} that" + statsd = sentry_envelopes.first.items.first.payload + expect(statsd).to include(sanitized_tags) + end + end end describe '#kill' do