Skip to content

Commit

Permalink
Update key, unit and tags sanitization logic for metrics (#2292)
Browse files Browse the repository at this point in the history
  • Loading branch information
sl0thentr0py committed Apr 10, 2024
1 parent aa2e2e6 commit 16ae2c8
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 10 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,9 @@
## Unreleased

### Features

- Update key, unit and tags sanitization logic for metrics [#2292](https://github.com/getsentry/sentry-ruby/pull/2292)

### Bug Fixes

- Make sure isolated envelopes respect `config.enabled_environments` [#2291](https://github.com/getsentry/sentry-ruby/pull/2291)
Expand Down
32 changes: 25 additions & 7 deletions sentry-ruby/lib/sentry/metrics/aggregator.rb
Expand Up @@ -12,8 +12,18 @@ class Aggregator
# when we record code locations
DEFAULT_STACKLEVEL = 4

KEY_SANITIZATION_REGEX = /[^a-zA-Z0-9_\/.-]+/
VALUE_SANITIZATION_REGEX = /[^[[:word:]][[:digit:]][[:space:]]_:\/@\.{}\[\]$-]+/
KEY_SANITIZATION_REGEX = /[^a-zA-Z0-9_\-.]+/
UNIT_SANITIZATION_REGEX = /[^a-zA-Z0-9_]+/
TAG_KEY_SANITIZATION_REGEX = /[^a-zA-Z0-9_\-.\/]+/

TAG_VALUE_SANITIZATION_MAP = {
"\n" => "\\n",
"\r" => "\\r",
"\t" => "\\t",
"\\" => "\\\\",
"|" => "\\u{7c}",
"," => "\\u{2c}"
}

METRIC_TYPES = {
c: CounterMetric,
Expand Down Expand Up @@ -180,17 +190,17 @@ def serialize_buckets(buckets)
timestamp_buckets.map do |metric_key, metric|
type, key, unit, tags = metric_key
values = metric.serialize.join(':')
sanitized_tags = tags.map { |k, v| "#{sanitize_key(k)}:#{sanitize_value(v)}" }.join(',')
sanitized_tags = tags.map { |k, v| "#{sanitize_tag_key(k)}:#{sanitize_tag_value(v)}" }.join(',')

"#{sanitize_key(key)}@#{unit}:#{values}|#{type}|\##{sanitized_tags}|T#{timestamp}"
"#{sanitize_key(key)}@#{sanitize_unit(unit)}:#{values}|#{type}|\##{sanitized_tags}|T#{timestamp}"
end
end.flatten.join("\n")
end

def serialize_locations(timestamp, locations)
mapping = locations.map do |meta_key, location|
type, key, unit = meta_key
mri = "#{type}:#{sanitize_key(key)}@#{unit}"
mri = "#{type}:#{sanitize_key(key)}@#{sanitize_unit(unit)}"

# note this needs to be an array but it really doesn't serve a purpose right now
[mri, [location.merge(type: 'location')]]
Expand All @@ -203,8 +213,16 @@ def sanitize_key(key)
key.gsub(KEY_SANITIZATION_REGEX, '_')
end

def sanitize_value(value)
value.gsub(VALUE_SANITIZATION_REGEX, '')
def sanitize_unit(unit)
unit.gsub(UNIT_SANITIZATION_REGEX, '')
end

def sanitize_tag_key(key)
key.gsub(TAG_KEY_SANITIZATION_REGEX, '')
end

def sanitize_tag_value(value)
value.chars.map { |c| TAG_VALUE_SANITIZATION_MAP[c] || c }.join
end

def get_transaction_name
Expand Down
38 changes: 35 additions & 3 deletions sentry-ruby/spec/sentry/metrics/aggregator_spec.rb
Expand Up @@ -381,7 +381,7 @@
incr, dist = item.payload.split("\n")
expect(incr).to eq("incr@none:10.0|c|#environment:test,release:test-release|T#{fake_time.to_i - 3}")
expect(dist).to eq("dis_t@second:0.0:1.0:2.0:3.0:4.0|d|" +
"#environment:test,fo_-bar:snöwmän 23{},release:test-release|" +
"#environment:test,fo-bar:snöwmän% 23{},release:test-release|" +
"T#{fake_time.to_i - 3}")
end

Expand Down Expand Up @@ -431,15 +431,47 @@
incr1, dist1, incr2, dist2 = item.payload.split("\n")
expect(incr1).to eq("incr@none:10.0|c|#environment:test,release:test-release|T#{fake_time.to_i - 3}")
expect(dist1).to eq("dis_t@second:0.0:1.0:2.0:3.0:4.0|d|" +
"#environment:test,fo_-bar:snöwmän 23{},release:test-release|" +
"#environment:test,fo-bar:snöwmän% 23{},release:test-release|" +
"T#{fake_time.to_i - 3}")
expect(incr2).to eq("incr@none:5.0|c|#environment:test,release:test-release|T#{fake_time.to_i + 7}")
expect(dist2).to eq("dis_t@second:5.0:6.0:7.0:8.0:9.0|d|" +
"#environment:test,fo_-bar:snöwmän 23{},release:test-release|" +
"#environment:test,fo-bar:snöwmän% 23{},release:test-release|" +
"T#{fake_time.to_i + 7}")
end
end
end

context 'sanitization' do
it 'sanitizes the metric key' do
subject.add(:c, 'foo.disöt_12-bar', 1)
subject.flush(force: true)

sanitized_key = 'foo.dis_t_12-bar'
statsd, metrics_meta = sentry_envelopes.first.items.map(&:payload)
expect(statsd).to include(sanitized_key)
expect(metrics_meta[:mapping].keys.first).to include(sanitized_key)
end

it 'sanitizes the metric unit' do
subject.add(:c, 'incr', 1, unit: 'disöt_12-/.test')
subject.flush(force: true)

sanitized_unit = '@dist_12test'
statsd, metrics_meta = sentry_envelopes.first.items.map(&:payload)
expect(statsd).to include(sanitized_unit)
expect(metrics_meta[:mapping].keys.first).to include(sanitized_unit)
end

it 'sanitizes tag keys and values' do
tags = { "get.foö-$bar/12" => "hello!\n\r\t\\ 42 this | or , that" }
subject.add(:c, 'incr', 1, tags: tags)
subject.flush(force: true)

sanitized_tags = "get.fo-bar/12:hello!\\n\\r\\t\\\\ 42 this \\u{7c} or \\u{2c} that"
statsd = sentry_envelopes.first.items.first.payload
expect(statsd).to include(sanitized_tags)
end
end
end

describe '#kill' do
Expand Down

0 comments on commit 16ae2c8

Please sign in to comment.