Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update key, unit and tags sanitization logic for metrics #2292

Merged
merged 1 commit into from Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,9 @@
## Unreleased

### Features

- Update key, unit and tags sanitization logic for metrics [#2292](https://github.com/getsentry/sentry-ruby/pull/2292)

### Bug Fixes

- Make sure isolated envelopes respect `config.enabled_environments` [#2291](https://github.com/getsentry/sentry-ruby/pull/2291)
Expand Down
32 changes: 25 additions & 7 deletions sentry-ruby/lib/sentry/metrics/aggregator.rb
Expand Up @@ -12,8 +12,18 @@ class Aggregator
# when we record code locations
DEFAULT_STACKLEVEL = 4

KEY_SANITIZATION_REGEX = /[^a-zA-Z0-9_\/.-]+/
VALUE_SANITIZATION_REGEX = /[^[[:word:]][[:digit:]][[:space:]]_:\/@\.{}\[\]$-]+/
KEY_SANITIZATION_REGEX = /[^a-zA-Z0-9_\-.]+/
UNIT_SANITIZATION_REGEX = /[^a-zA-Z0-9_]+/
TAG_KEY_SANITIZATION_REGEX = /[^a-zA-Z0-9_\-.\/]+/

TAG_VALUE_SANITIZATION_MAP = {
"\n" => "\\n",
"\r" => "\\r",
"\t" => "\\t",
"\\" => "\\\\",
"|" => "\\u{7c}",
"," => "\\u{2c}"
}

METRIC_TYPES = {
c: CounterMetric,
Expand Down Expand Up @@ -180,17 +190,17 @@ def serialize_buckets(buckets)
timestamp_buckets.map do |metric_key, metric|
type, key, unit, tags = metric_key
values = metric.serialize.join(':')
sanitized_tags = tags.map { |k, v| "#{sanitize_key(k)}:#{sanitize_value(v)}" }.join(',')
sanitized_tags = tags.map { |k, v| "#{sanitize_tag_key(k)}:#{sanitize_tag_value(v)}" }.join(',')

"#{sanitize_key(key)}@#{unit}:#{values}|#{type}|\##{sanitized_tags}|T#{timestamp}"
"#{sanitize_key(key)}@#{sanitize_unit(unit)}:#{values}|#{type}|\##{sanitized_tags}|T#{timestamp}"
end
end.flatten.join("\n")
end

def serialize_locations(timestamp, locations)
mapping = locations.map do |meta_key, location|
type, key, unit = meta_key
mri = "#{type}:#{sanitize_key(key)}@#{unit}"
mri = "#{type}:#{sanitize_key(key)}@#{sanitize_unit(unit)}"

# note this needs to be an array but it really doesn't serve a purpose right now
[mri, [location.merge(type: 'location')]]
Expand All @@ -203,8 +213,16 @@ def sanitize_key(key)
key.gsub(KEY_SANITIZATION_REGEX, '_')
end

def sanitize_value(value)
value.gsub(VALUE_SANITIZATION_REGEX, '')
def sanitize_unit(unit)
unit.gsub(UNIT_SANITIZATION_REGEX, '')
end

def sanitize_tag_key(key)
key.gsub(TAG_KEY_SANITIZATION_REGEX, '')
end

def sanitize_tag_value(value)
value.chars.map { |c| TAG_VALUE_SANITIZATION_MAP[c] || c }.join
end

def get_transaction_name
Expand Down
38 changes: 35 additions & 3 deletions sentry-ruby/spec/sentry/metrics/aggregator_spec.rb
Expand Up @@ -381,7 +381,7 @@
incr, dist = item.payload.split("\n")
expect(incr).to eq("incr@none:10.0|c|#environment:test,release:test-release|T#{fake_time.to_i - 3}")
expect(dist).to eq("dis_t@second:0.0:1.0:2.0:3.0:4.0|d|" +
"#environment:test,fo_-bar:snöwmän 23{},release:test-release|" +
"#environment:test,fo-bar:snöwmän% 23{},release:test-release|" +
"T#{fake_time.to_i - 3}")
end

Expand Down Expand Up @@ -431,15 +431,47 @@
incr1, dist1, incr2, dist2 = item.payload.split("\n")
expect(incr1).to eq("incr@none:10.0|c|#environment:test,release:test-release|T#{fake_time.to_i - 3}")
expect(dist1).to eq("dis_t@second:0.0:1.0:2.0:3.0:4.0|d|" +
"#environment:test,fo_-bar:snöwmän 23{},release:test-release|" +
"#environment:test,fo-bar:snöwmän% 23{},release:test-release|" +
"T#{fake_time.to_i - 3}")
expect(incr2).to eq("incr@none:5.0|c|#environment:test,release:test-release|T#{fake_time.to_i + 7}")
expect(dist2).to eq("dis_t@second:5.0:6.0:7.0:8.0:9.0|d|" +
"#environment:test,fo_-bar:snöwmän 23{},release:test-release|" +
"#environment:test,fo-bar:snöwmän% 23{},release:test-release|" +
"T#{fake_time.to_i + 7}")
end
end
end

context 'sanitization' do
it 'sanitizes the metric key' do
subject.add(:c, 'foo.disöt_12-bar', 1)
subject.flush(force: true)

sanitized_key = 'foo.dis_t_12-bar'
statsd, metrics_meta = sentry_envelopes.first.items.map(&:payload)
expect(statsd).to include(sanitized_key)
expect(metrics_meta[:mapping].keys.first).to include(sanitized_key)
end

it 'sanitizes the metric unit' do
subject.add(:c, 'incr', 1, unit: 'disöt_12-/.test')
subject.flush(force: true)

sanitized_unit = '@dist_12test'
statsd, metrics_meta = sentry_envelopes.first.items.map(&:payload)
expect(statsd).to include(sanitized_unit)
expect(metrics_meta[:mapping].keys.first).to include(sanitized_unit)
end

it 'sanitizes tag keys and values' do
tags = { "get.foö-$bar/12" => "hello!\n\r\t\\ 42 this | or , that" }
subject.add(:c, 'incr', 1, tags: tags)
subject.flush(force: true)

sanitized_tags = "get.fo-bar/12:hello!\\n\\r\\t\\\\ 42 this \\u{7c} or \\u{2c} that"
statsd = sentry_envelopes.first.items.first.payload
expect(statsd).to include(sanitized_tags)
end
end
end

describe '#kill' do
Expand Down