getsentry · antonpirker · Nov 24, 2023 · Nov 20, 2023 · Nov 23, 2023 · Nov 23, 2023
diff --git a/sentry_sdk/_types.py b/sentry_sdk/_types.py
@@ -117,3 +117,4 @@
     FlushedMetricValue = Union[int, float]
 
     BucketKey = Tuple[MetricType, str, MeasurementUnit, MetricTagsInternal]
+    MetricMetaKey = Tuple[MetricType, str, MeasurementUnit]
diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
@@ -237,11 +237,15 @@ def _capture_envelope(envelope):
             self.session_flusher = SessionFlusher(capture_func=_capture_envelope)
 
             self.metrics_aggregator = None  # type: Optional[MetricsAggregator]
-            if self.options.get("_experiments", {}).get("enable_metrics"):
+            experiments = self.options.get("_experiments", {})
+            if experiments.get("enable_metrics"):
                 from sentry_sdk.metrics import MetricsAggregator
 
                 self.metrics_aggregator = MetricsAggregator(
-                    capture_func=_capture_envelope
+                    capture_func=_capture_envelope,
+                    enable_code_locations=bool(
+                        experiments.get("metric_code_locations")
+                    ),
                 )
 
             max_request_body_size = ("always", "never", "small", "medium")

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
@@ -47,6 +47,7 @@
             "transport_num_pools": Optional[int],
             "enable_metrics": Optional[bool],
             "before_emit_metric": Optional[Callable[[str, MetricTags], bool]],
+            "metric_code_locations": Optional[bool],
         },
         total=False,
     )

diff --git a/sentry_sdk/metrics.py b/sentry_sdk/metrics.py
@@ -1,6 +1,7 @@
 import os
 import io
 import re
+import sys
 import threading
 import random
 import time
@@ -11,8 +12,14 @@
 from contextlib import contextmanager
 
 import sentry_sdk
-from sentry_sdk._compat import text_type
-from sentry_sdk.utils import now, nanosecond_time, to_timestamp
+from sentry_sdk._compat import text_type, utc_from_timestamp, iteritems
+from sentry_sdk.utils import (
+    now,
+    nanosecond_time,
+    to_timestamp,
+    serialize_frame,
+    json_dumps,
+)
 from sentry_sdk.envelope import Envelope, Item
 from sentry_sdk.tracing import (
     TRANSACTION_SOURCE_ROUTE,
@@ -24,18 +31,21 @@
 
 if TYPE_CHECKING:
     from typing import Any
+    from typing import Callable
     from typing import Dict
+    from typing import Generator
     from typing import Iterable
-    from typing import Callable
+    from typing import List
     from typing import Optional
-    from typing import Generator
+    from typing import Set
     from typing import Tuple
     from typing import Union
 
     from sentry_sdk._types import BucketKey
     from sentry_sdk._types import DurationUnit
     from sentry_sdk._types import FlushedMetricValue
     from sentry_sdk._types import MeasurementUnit
+    from sentry_sdk._types import MetricMetaKey
     from sentry_sdk._types import MetricTagValue
     from sentry_sdk._types import MetricTags
     from sentry_sdk._types import MetricTagsInternal
@@ -46,6 +56,7 @@
 _thread_local = threading.local()
 _sanitize_key = partial(re.compile(r"[^a-zA-Z0-9_/.-]+").sub, "_")
 _sanitize_value = partial(re.compile(r"[^\w\d_:/@\.{}\[\]$-]+", re.UNICODE).sub, "_")
+_set = set  # set is shadowed below
 
 GOOD_TRANSACTION_SOURCES = frozenset(
     [
@@ -57,6 +68,18 @@
 )
 
 
+def get_code_location(stacklevel):
+    # type: (int) -> Optional[Dict[str, Any]]
+    try:
+        frm = sys._getframe(stacklevel + 4)
+    except Exception:
+        return None
+
+    return serialize_frame(
+        frm, include_local_variables=False, include_source_context=False
+    )
+
+
 @contextmanager
 def recursion_protection():
     # type: () -> Generator[bool, None, None]
@@ -247,7 +270,7 @@
     # relay side emission and should not happen commonly.
 
     for timestamp, buckets in flushable_buckets:
-        for bucket_key, metric in buckets.items():
+        for bucket_key, metric in iteritems(buckets):
             metric_type, metric_name, metric_unit, metric_tags = bucket_key
             metric_name = _sanitize_key(metric_name)
             _write(metric_name.encode("utf-8"))
@@ -283,6 +306,20 @@
     return out.getvalue()
 
 
+def _encode_locations(timestamp, code_locations):
+    # type: (int, Iterable[Tuple[MetricMetaKey, Dict[str, Any]]]) -> bytes
+    mapping = {}  # type: Dict[str, List[Any]]
+
+    for key, loc in code_locations:
+        metric_type, name, unit = key
+        mri = "{}:{}@{}".format(metric_type, _sanitize_key(name), unit)
+
+        loc["type"] = "location"
+        mapping.setdefault(mri, []).append(loc)
+
+    return json_dumps({"timestamp": timestamp, "mapping": mapping})
+
+
 METRIC_TYPES = {
     "c": CounterMetric,
     "g": GaugeMetric,
@@ -311,9 +348,13 @@
     def __init__(
         self,
         capture_func,  # type: Callable[[Envelope], None]
+        enable_code_locations=False,  # type: bool
     ):
         # type: (...) -> None
         self.buckets = {}  # type: Dict[int, Any]
+        self._enable_code_locations = enable_code_locations
+        self._seen_locations = _set()  # type: Set[Tuple[int, MetricMetaKey]]
+        self._pending_locations = {}  # type: Dict[int, List[Tuple[MetricMetaKey, Any]]]
         self._buckets_total_weight = 0
         self._capture_func = capture_func
         self._lock = Lock()
@@ -366,9 +407,7 @@
 
     def _flush(self):
         # type: (...) -> None
-        flushable_buckets = self._flushable_buckets()
-        if flushable_buckets:
-            self._emit(flushable_buckets)
+        self._emit(self._flushable_buckets(), self._flushable_locations())
 
     def _flushable_buckets(self):
         # type: (...) -> (Iterable[Tuple[int, Dict[BucketKey, Metric]]])
@@ -385,21 +424,28 @@
                 self._force_flush = False
             else:
                 flushable_buckets = []
-                for buckets_timestamp, buckets in self.buckets.items():
+                for buckets_timestamp, buckets in iteritems(self.buckets):
                     # If the timestamp of the bucket is newer that the rollup we want to skip it.
                     if buckets_timestamp <= cutoff:
                         flushable_buckets.append((buckets_timestamp, buckets))
 
                 # We will clear the elements while holding the lock, in order to avoid requesting it downstream again.
                 for buckets_timestamp, buckets in flushable_buckets:
-                    for _, metric in buckets.items():
+                    for _, metric in iteritems(buckets):
                         weight_to_remove += metric.weight
                     del self.buckets[buckets_timestamp]
 
                 self._buckets_total_weight -= weight_to_remove
 
         return flushable_buckets
 
+    def _flushable_locations(self):
+        # type: (...) -> Dict[int, List[Tuple[MetricMetaKey, Dict[str, Any]]]]
+        with self._lock:
+            locations = self._pending_locations
+            self._pending_locations = {}
+        return locations
+
     @metrics_noop
     def add(
         self,
@@ -409,6 +455,7 @@
         unit,  # type: MeasurementUnit
         tags,  # type: Optional[MetricTags]
         timestamp=None,  # type: Optional[Union[float, datetime]]
+        stacklevel=0,  # type: int
     ):
         # type: (...) -> None
         if not self._ensure_thread() or self._flusher is None:
@@ -441,6 +488,24 @@
 
             self._buckets_total_weight += metric.weight - previous_weight
 
+            # Store code location once per metric and per day (of bucket timestamp)
+            if self._enable_code_locations:
+                meta_key = (ty, key, unit)
+                start_of_day = utc_from_timestamp(timestamp).replace(
+                    hour=0, minute=0, second=0, microsecond=0, tzinfo=None
+                )
+                start_of_day = int(to_timestamp(start_of_day))
+
+                if (start_of_day, meta_key) not in self._seen_locations:
+                    self._seen_locations.add((start_of_day, meta_key))
+                    loc = get_code_location(stacklevel)
+                    if loc is not None:
+                        # Group metadata by day to make flushing more efficient.
+                        # There needs to be one envelope item per timestamp.
+                        self._pending_locations.setdefault(start_of_day, []).append(
+                            (meta_key, loc)
+                        )
+
         # Given the new weight we consider whether we want to force flush.
         self._consider_force_flush()
 
@@ -471,13 +536,23 @@
     def _emit(
         self,
         flushable_buckets,  # type: (Iterable[Tuple[int, Dict[BucketKey, Metric]]])
+        code_locations,  # type: Dict[int, List[Tuple[MetricMetaKey, Dict[str, Any]]]]
     ):
-        # type: (...) -> Envelope
-        encoded_metrics = _encode_metrics(flushable_buckets)
-        metric_item = Item(payload=encoded_metrics, type="statsd")
-        envelope = Envelope(items=[metric_item])
-        self._capture_func(envelope)
-        return envelope
+        # type: (...) -> Optional[Envelope]
+        envelope = Envelope()
+
+        if flushable_buckets:
+            encoded_metrics = _encode_metrics(flushable_buckets)
+            envelope.add_item(Item(payload=encoded_metrics, type="statsd"))
+
+        for timestamp, locations in iteritems(code_locations):
+            encoded_locations = _encode_locations(timestamp, locations)
+            envelope.add_item(Item(payload=encoded_locations, type="metric_meta"))
+
+        if envelope.items:
+            self._capture_func(envelope)
+            return envelope
+        return None
 
     def _serialize_tags(
         self, tags  # type: Optional[MetricTags]
@@ -487,7 +562,7 @@
             return ()
 
         rv = []
-        for key, value in tags.items():
+        for key, value in iteritems(tags):
             # If the value is a collection, we want to flatten it.
             if isinstance(value, (list, tuple)):
                 for inner_value in value:
@@ -536,12 +611,13 @@
     unit="none",  # type: MeasurementUnit
     tags=None,  # type: Optional[MetricTags]
     timestamp=None,  # type: Optional[Union[float, datetime]]
+    stacklevel=0,  # type: int
 ):
     # type: (...) -> None
     """Increments a counter."""
     aggregator, tags = _get_aggregator_and_update_tags(key, tags)
     if aggregator is not None:
-        aggregator.add("c", key, value, unit, tags, timestamp)
+        aggregator.add("c", key, value, unit, tags, timestamp, stacklevel)
 
 
 class _Timing(object):
@@ -552,6 +628,7 @@
         timestamp,  # type: Optional[Union[float, datetime]]
         value,  # type: Optional[float]
         unit,  # type: DurationUnit
+        stacklevel,  # type: int
     ):
         # type: (...) -> None
         self.key = key
@@ -560,6 +637,7 @@
         self.value = value
         self.unit = unit
         self.entered = None  # type: Optional[float]
+        self.stacklevel = stacklevel
 
     def _validate_invocation(self, context):
         # type: (str) -> None
@@ -579,7 +657,9 @@
         aggregator, tags = _get_aggregator_and_update_tags(self.key, self.tags)
         if aggregator is not None:
             elapsed = TIMING_FUNCTIONS[self.unit]() - self.entered  # type: ignore
-            aggregator.add("d", self.key, elapsed, self.unit, tags, self.timestamp)
+            aggregator.add(
+                "d", self.key, elapsed, self.unit, tags, self.timestamp, self.stacklevel
+            )
 
     def __call__(self, f):
         # type: (Any) -> Any
@@ -589,7 +669,11 @@
         def timed_func(*args, **kwargs):
             # type: (*Any, **Any) -> Any
             with timing(
-                key=self.key, tags=self.tags, timestamp=self.timestamp, unit=self.unit
+                key=self.key,
+                tags=self.tags,
+                timestamp=self.timestamp,
+                unit=self.unit,
+                stacklevel=self.stacklevel + 1,
             ):
                 return f(*args, **kwargs)
 
@@ -602,6 +686,7 @@
     unit="second",  # type: DurationUnit
     tags=None,  # type: Optional[MetricTags]
     timestamp=None,  # type: Optional[Union[float, datetime]]
+    stacklevel=0,  # type: int
 ):
     # type: (...) -> _Timing
     """Emits a distribution with the time it takes to run the given code block.
@@ -615,8 +700,8 @@
     if value is not None:
         aggregator, tags = _get_aggregator_and_update_tags(key, tags)
         if aggregator is not None:
-            aggregator.add("d", key, value, unit, tags, timestamp)
-    return _Timing(key, tags, timestamp, value, unit)
+            aggregator.add("d", key, value, unit, tags, timestamp, stacklevel)
+    return _Timing(key, tags, timestamp, value, unit, stacklevel)
 
 
 def distribution(
@@ -625,12 +710,13 @@
     unit="none",  # type: MeasurementUnit
     tags=None,  # type: Optional[MetricTags]
     timestamp=None,  # type: Optional[Union[float, datetime]]
+    stacklevel=0,  # type: int
 ):
     # type: (...) -> None
     """Emits a distribution."""
     aggregator, tags = _get_aggregator_and_update_tags(key, tags)
     if aggregator is not None:
-        aggregator.add("d", key, value, unit, tags, timestamp)
+        aggregator.add("d", key, value, unit, tags, timestamp, stacklevel)
 
 
 def set(
@@ -639,12 +725,13 @@
     unit="none",  # type: MeasurementUnit
     tags=None,  # type: Optional[MetricTags]
     timestamp=None,  # type: Optional[Union[float, datetime]]
+    stacklevel=0,  # type: int
 ):
     # type: (...) -> None
     """Emits a set."""
     aggregator, tags = _get_aggregator_and_update_tags(key, tags)
     if aggregator is not None:
-        aggregator.add("s", key, value, unit, tags, timestamp)
+        aggregator.add("s", key, value, unit, tags, timestamp, stacklevel)
 
 
 def gauge(
@@ -653,9 +740,10 @@
     unit="none",  # type: MetricValue
     tags=None,  # type: Optional[MetricTags]
     timestamp=None,  # type: Optional[Union[float, datetime]]
+    stacklevel=0,  # type: int
 ):
     # type: (...) -> None
     """Emits a gauge."""
     aggregator, tags = _get_aggregator_and_update_tags(key, tags)
     if aggregator is not None:
-        aggregator.add("g", key, value, unit, tags, timestamp)
+        aggregator.add("g", key, value, unit, tags, timestamp, stacklevel)