From 738506c29ac76d0a7ebf0a26b3c992ead8923f10 Mon Sep 17 00:00:00 2001 From: Glen Walker Date: Tue, 6 Feb 2024 04:34:00 +1300 Subject: [PATCH] Fix performance regression in sentry_sdk.utils._generate_installed_modules (#2703) Commit 8c24d33f causes a performance regression when PYTHONPATH is long, because it traverses PYTHONPATH for every distribution found (importlib.metadata.version traverses PYTHONPATH searching for a matching distribution for every call) In our case we have an environment containing ~500 paths, and containing ~100 distributions, and where the first call to sentry_sdk.utils.package_version causes ~150k filesystems operations taking 10-20 seconds. This commit uses the version from the distribution found when iterating all distributions, instead of calling importlib.metadata.version for each, which fixes the performance issue for us. Note that if multiple copies of a distribution with different versions exist in PYTHONPATH the existing _generate_installed_modules will return the name and version of the first matching distribution found multiple times, which will then be discarded by creation of a dict in _get_installed_modules. I have preserved the same behaviour by returning the name and version of a distribution only the first time a distribution name is seen. --- sentry_sdk/utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index cbca3f3b17..7c10d7cf43 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -1666,6 +1666,7 @@ def _generate_installed_modules(): try: from importlib import metadata + yielded = set() for dist in metadata.distributions(): name = dist.metadata["Name"] # `metadata` values may be `None`, see: @@ -1673,9 +1674,10 @@ def _generate_installed_modules(): # and # https://github.com/python/importlib_metadata/issues/371 if name is not None: - version = metadata.version(name) - if version is not None: - yield _normalize_module_name(name), version + normalized_name = _normalize_module_name(name) + if dist.version is not None and normalized_name not in yielded: + yield normalized_name, dist.version + yielded.add(normalized_name) except ImportError: # < py3.8