diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py index 21313c9f73..43bedcf383 100644 --- a/sentry_sdk/profiler.py +++ b/sentry_sdk/profiler.py @@ -21,7 +21,7 @@ import threading import time import uuid -from collections import deque, namedtuple +from collections import deque from contextlib import contextmanager import sentry_sdk @@ -35,10 +35,6 @@ nanosecond_time, ) -RawFrameData = namedtuple( - "RawFrameData", ["abs_path", "filename", "function", "lineno", "module"] -) - if MYPY: from types import FrameType from typing import Any @@ -54,9 +50,17 @@ import sentry_sdk.scope import sentry_sdk.tracing - RawStack = Tuple[RawFrameData, ...] - RawSample = Sequence[Tuple[str, RawStack]] - RawSampleWithId = Sequence[Tuple[str, int, RawStack]] + StackId = int + + RawFrame = Tuple[ + str, # abs_path + Optional[str], # module + Optional[str], # filename + str, # function + int, # lineno + ] + RawStack = Tuple[RawFrame, ...] + RawSample = Sequence[Tuple[str, Tuple[StackId, RawStack]]] ProcessedStack = Tuple[int, ...] @@ -155,8 +159,13 @@ def teardown_profiler(): MAX_STACK_DEPTH = 128 -def extract_stack(frame, max_stack_depth=MAX_STACK_DEPTH): - # type: (Optional[FrameType], int) -> Tuple[RawFrameData, ...] +def extract_stack( + frame, # type: Optional[FrameType] + cwd, # type: str + prev_cache=None, # type: Optional[Tuple[StackId, RawStack, Deque[FrameType]]] + max_stack_depth=MAX_STACK_DEPTH, # type: int +): + # type: (...) -> Tuple[StackId, RawStack, Deque[FrameType]] """ Extracts the stack starting the specified frame. The extracted stack assumes the specified frame is the top of the stack, and works back @@ -166,17 +175,47 @@ def extract_stack(frame, max_stack_depth=MAX_STACK_DEPTH): only the first `MAX_STACK_DEPTH` frames will be returned. """ - stack = deque(maxlen=max_stack_depth) # type: Deque[FrameType] + frames = deque(maxlen=max_stack_depth) # type: Deque[FrameType] while frame is not None: - stack.append(frame) + frames.append(frame) frame = frame.f_back - return tuple(extract_frame(frame) for frame in stack) + if prev_cache is None: + stack = tuple(extract_frame(frame, cwd) for frame in frames) + else: + _, prev_stack, prev_frames = prev_cache + prev_depth = len(prev_frames) + depth = len(frames) + + # We want to match the frame found in this sample to the frames found in the + # previous sample. If they are the same (using the `is` operator), we can + # skip the expensive work of extracting the frame information and reuse what + # we extracted during the last sample. + # + # Make sure to keep in mind that the stack is ordered from the inner most + # from to the outer most frame so be careful with the indexing. + stack = tuple( + prev_stack[i] + if i >= 0 and frame is prev_frames[i] + else extract_frame(frame, cwd) + for i, frame in zip(range(prev_depth - depth, prev_depth), frames) + ) + + # Instead of mapping the stack into frame ids and hashing + # that as a tuple, we can directly hash the stack. + # This saves us from having to generate yet another list. + # Additionally, using the stack as the key directly is + # costly because the stack can be large, so we pre-hash + # the stack, and use the hash as the key as this will be + # needed a few times to improve performance. + stack_id = hash(stack) + return stack_id, stack, frames -def extract_frame(frame): - # type: (FrameType) -> RawFrameData + +def extract_frame(frame, cwd): + # type: (FrameType, str) -> RawFrame abs_path = frame.f_code.co_filename try: @@ -184,12 +223,23 @@ def extract_frame(frame): except Exception: module = None - return RawFrameData( - abs_path=os.path.abspath(abs_path), - filename=filename_for_module(module, abs_path) or None, - function=get_frame_name(frame), - lineno=frame.f_lineno, - module=module, + # namedtuples can be many times slower when initialing + # and accessing attribute so we opt to use a tuple here instead + return ( + # This originally was `os.path.abspath(abs_path)` but that had + # a large performance overhead. + # + # According to docs, this is equivalent to + # `os.path.normpath(os.path.join(os.getcwd(), path))`. + # The `os.getcwd()` call is slow here, so we precompute it. + # + # Additionally, since we are using normalized path already, + # we skip calling `os.path.normpath` entirely. + os.path.join(cwd, abs_path), + module, + filename_for_module(module, abs_path) or None, + get_frame_name(frame), + frame.f_lineno, ) @@ -200,6 +250,8 @@ def get_frame_name(frame): # we should consider using instead where possible f_code = frame.f_code + co_varnames = f_code.co_varnames + # co_name only contains the frame name. If the frame was a method, # the class name will NOT be included. name = f_code.co_name @@ -210,8 +262,8 @@ def get_frame_name(frame): if ( # the co_varnames start with the frame's positional arguments # and we expect the first to be `self` if its an instance method - f_code.co_varnames - and f_code.co_varnames[0] == "self" + co_varnames + and co_varnames[0] == "self" and "self" in frame.f_locals ): for cls in frame.f_locals["self"].__class__.__mro__: @@ -226,8 +278,8 @@ def get_frame_name(frame): if ( # the co_varnames start with the frame's positional arguments # and we expect the first to be `cls` if its a class method - f_code.co_varnames - and f_code.co_varnames[0] == "cls" + co_varnames + and co_varnames[0] == "cls" and "cls" in frame.f_locals ): for cls in frame.f_locals["cls"].__mro__: @@ -338,13 +390,11 @@ class SampleBuffer(object): def __init__(self, capacity): # type: (int) -> None - self.buffer = [ - None - ] * capacity # type: List[Optional[Tuple[int, RawSampleWithId]]] + self.buffer = [None] * capacity # type: List[Optional[Tuple[int, RawSample]]] self.capacity = capacity # type: int self.idx = 0 # type: int - def write(self, ts, raw_sample): + def write(self, ts, sample): # type: (int, RawSample) -> None """ Writing to the buffer is not thread safe. There is the possibility @@ -359,32 +409,16 @@ def write(self, ts, raw_sample): """ idx = self.idx - sample = [ - ( - thread_id, - # Instead of mapping the stack into frame ids and hashing - # that as a tuple, we can directly hash the stack. - # This saves us from having to generate yet another list. - # Additionally, using the stack as the key directly is - # costly because the stack can be large, so we pre-hash - # the stack, and use the hash as the key as this will be - # needed a few times to improve performance. - hash(stack), - stack, - ) - for thread_id, stack in raw_sample - ] - self.buffer[idx] = (ts, sample) self.idx = (idx + 1) % self.capacity def slice_profile(self, start_ns, stop_ns): # type: (int, int) -> ProcessedProfile samples = [] # type: List[ProcessedSample] - stacks = dict() # type: Dict[int, int] - stacks_list = list() # type: List[ProcessedStack] - frames = dict() # type: Dict[RawFrameData, int] - frames_list = list() # type: List[ProcessedFrame] + stacks = {} # type: Dict[StackId, int] + stacks_list = [] # type: List[ProcessedStack] + frames = {} # type: Dict[RawFrame, int] + frames_list = [] # type: List[ProcessedFrame] for ts, sample in filter(None, self.buffer): if start_ns > ts or ts > stop_ns: @@ -392,7 +426,7 @@ def slice_profile(self, start_ns, stop_ns): elapsed_since_start_ns = str(ts - start_ns) - for tid, hashed_stack, stack in sample: + for tid, (hashed_stack, stack) in sample: # Check if the stack is indexed first, this lets us skip # indexing frames if it's not necessary if hashed_stack not in stacks: @@ -401,11 +435,11 @@ def slice_profile(self, start_ns, stop_ns): frames[frame] = len(frames) frames_list.append( { - "abs_path": frame.abs_path, - "function": frame.function or "", - "filename": frame.filename, - "lineno": frame.lineno, - "module": frame.module, + "abs_path": frame[0], + "module": frame[1], + "filename": frame[2], + "function": frame[3], + "lineno": frame[4], } ) @@ -439,6 +473,14 @@ def slice_profile(self, start_ns, stop_ns): def make_sampler(self): # type: () -> Callable[..., None] + cwd = os.getcwd() + + # In Python3+, we can use the `nonlocal` keyword to rebind the value, + # but this is not possible in Python2. To get around this, we wrap + # the value in a list to allow updating this value each sample. + last_sample = [ + {} + ] # type: List[Dict[int, Tuple[StackId, RawStack, Deque[FrameType]]]] def _sample_stack(*args, **kwargs): # type: (*Any, **Any) -> None @@ -447,13 +489,20 @@ def _sample_stack(*args, **kwargs): This should be called at a regular interval to collect samples. """ - self.write( - nanosecond_time(), - [ - (str(tid), extract_stack(frame)) - for tid, frame in sys._current_frames().items() - ], - ) + now = nanosecond_time() + raw_sample = { + tid: extract_stack(frame, cwd, last_sample[0].get(tid)) + for tid, frame in sys._current_frames().items() + } + + last_sample[0] = raw_sample + + sample = [ + (str(tid), (stack_id, stack)) + for tid, (stack_id, stack, _) in raw_sample.items() + ] + + self.write(now, sample) return _sample_stack diff --git a/tests/test_profiler.py b/tests/test_profiler.py index 9a268713c8..9ee49bb035 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -1,4 +1,5 @@ import inspect +import os import platform import sys import threading @@ -8,9 +9,9 @@ from sentry_sdk.profiler import ( EventScheduler, - RawFrameData, SampleBuffer, SleepScheduler, + extract_frame, extract_stack, get_frame_name, setup_profiler, @@ -26,6 +27,10 @@ ) +def process_test_sample(sample): + return [(tid, (stack, stack)) for tid, stack in sample] + + @minimum_python_33 def test_profiler_invalid_mode(teardown_profiling): with pytest.raises(ValueError): @@ -209,6 +214,33 @@ def test_get_frame_name(frame, frame_name): assert get_frame_name(frame) == frame_name +@pytest.mark.parametrize( + ("get_frame", "function"), + [ + pytest.param(lambda: get_frame(depth=1), "get_frame", id="simple"), + ], +) +def test_extract_frame(get_frame, function): + cwd = os.getcwd() + frame = get_frame() + extracted_frame = extract_frame(frame, cwd) + + # the abs_path should be equal toe the normalized path of the co_filename + assert extracted_frame[0] == os.path.normpath(frame.f_code.co_filename) + + # the module should be pull from this test module + assert extracted_frame[1] == __name__ + + # the filename should be the file starting after the cwd + assert extracted_frame[2] == __file__[len(cwd) + 1 :] + + assert extracted_frame[3] == function + + # the lineno will shift over time as this file is modified so just check + # that it is an int + assert isinstance(extracted_frame[4], int) + + @pytest.mark.parametrize( ("depth", "max_stack_depth", "actual_depth"), [ @@ -227,15 +259,33 @@ def test_extract_stack_with_max_depth(depth, max_stack_depth, actual_depth): # increase the max_depth by the `base_stack_depth` to account # for the extra frames pytest will add - stack = extract_stack(frame, max_stack_depth + base_stack_depth) + _, stack, _ = extract_stack( + frame, os.getcwd(), max_stack_depth=max_stack_depth + base_stack_depth + ) assert len(stack) == base_stack_depth + actual_depth for i in range(actual_depth): - assert stack[i].function == "get_frame", i + assert stack[i][3] == "get_frame", i # index 0 contains the inner most frame on the stack, so the lamdba # should be at index `actual_depth` - assert stack[actual_depth].function == "", actual_depth + assert stack[actual_depth][3] == "", actual_depth + + +def test_extract_stack_with_cache(): + frame = get_frame(depth=1) + + prev_cache = extract_stack(frame, os.getcwd()) + _, stack1, _ = prev_cache + _, stack2, _ = extract_stack(frame, os.getcwd(), prev_cache) + + assert len(stack1) == len(stack2) + for i, (frame1, frame2) in enumerate(zip(stack1, stack2)): + # DO NOT use `==` for the assertion here since we are + # testing for identity, and using `==` would test for + # equality which would always pass since we're extract + # the same stack. + assert frame1 is frame2, i def get_scheduler_threads(scheduler): @@ -250,7 +300,7 @@ def __init__(self, capacity, sample_data=None): def make_sampler(self): def _sample_stack(*args, **kwargs): ts, sample = self.sample_data.pop(0) - self.write(ts, sample) + self.write(ts, process_test_sample(sample)) return _sample_stack @@ -272,11 +322,7 @@ def test_thread_scheduler_takes_first_samples(scheduler_class): [ ( 0, - ( - RawFrameData( - "/path/to/file.py", "file.py", "name", 1, "file" - ), - ), + (("/path/to/file.py", "file", "file.py", "name", 1),), ) ], ) @@ -312,11 +358,7 @@ def test_thread_scheduler_takes_more_samples(scheduler_class): [ ( 0, - ( - RawFrameData( - "/path/to/file.py", "file.py", "name", 1, "file" - ), - ), + (("/path/to/file.py", "file", "file.py", "name", 1),), ) ], ) @@ -420,11 +462,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class): [ ( "1", - ( - RawFrameData( - "/path/to/file.py", "file.py", "name", 1, "file" - ), - ), + (("/path/to/file.py", "file", "file.py", "name", 1),), ) ], ) @@ -447,11 +485,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class): [ ( "1", - ( - RawFrameData( - "/path/to/file.py", "file.py", "name", 1, "file" - ), - ), + (("/path/to/file.py", "file", "file.py", "name", 1),), ) ], ) @@ -488,11 +522,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class): [ ( "1", - ( - RawFrameData( - "/path/to/file.py", "file.py", "name", 1, "file" - ), - ), + (("/path/to/file.py", "file", "file.py", "name", 1),), ) ], ), @@ -501,11 +531,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class): [ ( "1", - ( - RawFrameData( - "/path/to/file.py", "file.py", "name", 1, "file" - ), - ), + (("/path/to/file.py", "file", "file.py", "name", 1),), ) ], ), @@ -547,11 +573,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class): [ ( "1", - ( - RawFrameData( - "/path/to/file.py", "file.py", "name1", 1, "file" - ), - ), + (("/path/to/file.py", "file", "file.py", "name1", 1),), ) ], ), @@ -561,12 +583,8 @@ def test_thread_scheduler_single_background_thread(scheduler_class): ( "1", ( - RawFrameData( - "/path/to/file.py", "file.py", "name1", 1, "file" - ), - RawFrameData( - "/path/to/file.py", "file.py", "name2", 2, "file" - ), + ("/path/to/file.py", "file", "file.py", "name1", 1), + ("/path/to/file.py", "file", "file.py", "name2", 2), ), ) ], @@ -617,11 +635,14 @@ def test_thread_scheduler_single_background_thread(scheduler_class): ( "1", ( - RawFrameData( - "/path/to/file.py", "file.py", "name1", 1, "file" - ), - RawFrameData( - "/path/to/file.py", "file.py", "name2", 2, "file" + ("/path/to/file.py", "file", "file.py", "name1", 1), + ( + "/path/to/file.py", + "file", + "file.py", + "name2", + 2, + "file", ), ), ) @@ -633,11 +654,21 @@ def test_thread_scheduler_single_background_thread(scheduler_class): ( "1", ( - RawFrameData( - "/path/to/file.py", "file.py", "name3", 3, "file" + ( + "/path/to/file.py", + "file", + "file.py", + "name3", + 3, + "file", ), - RawFrameData( - "/path/to/file.py", "file.py", "name4", 4, "file" + ( + "/path/to/file.py", + "file", + "file.py", + "name4", + 4, + "file", ), ), ) @@ -702,11 +733,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class): [ ( "1", - ( - RawFrameData( - "/path/to/file.py", "file.py", "name1", 1, "file" - ), - ), + (("/path/to/file.py", "file", "file.py", "name1", 1),), ) ], ), @@ -716,12 +743,8 @@ def test_thread_scheduler_single_background_thread(scheduler_class): ( "1", ( - RawFrameData( - "/path/to/file.py", "file.py", "name2", 2, "file" - ), - RawFrameData( - "/path/to/file.py", "file.py", "name3", 3, "file" - ), + ("/path/to/file.py", "file", "file.py", "name2", 2), + ("/path/to/file.py", "file", "file.py", "name3", 3), ), ) ], @@ -761,6 +784,6 @@ def test_thread_scheduler_single_background_thread(scheduler_class): def test_sample_buffer(capacity, start_ns, stop_ns, samples, profile): buffer = SampleBuffer(capacity) for ts, sample in samples: - buffer.write(ts, sample) + buffer.write(ts, process_test_sample(sample)) result = buffer.slice_profile(start_ns, stop_ns) assert result == profile