diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b2fc2dd3..d8bc8918 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,9 @@ Changelog * Enforce that the entire marker string is parsed (:issue:`687`) * Requirement parsing no longer automatically validates the URL (:issue:`120`) * Canonicalize names for requirements comparison (:issue:`644`) +* Introduce `metadata.Metadata` (along with `metadata.ExceptionGroup` and `metadata.InvalidMetadata`; :issue:`570`) +* Introduce the `validate` keyword parameter to `utils.validate_name()` (:issue:`570`) +* Introduce `utils.is_normalized_name()` (:issue:`570`) 23.1 - 2023-04-12 ~~~~~~~~~~~~~~~~~ diff --git a/docs/metadata.rst b/docs/metadata.rst index b87574cb..bf57ee1d 100644 --- a/docs/metadata.rst +++ b/docs/metadata.rst @@ -25,15 +25,39 @@ Usage 'packaging' >>> raw["version"] '24.0' + >>> from packaging.metadata import Metadata + >>> parsed = Metadata.from_raw(raw) + >>> parsed.name + 'packaging' + >>> parsed.version + Reference --------- +High Level Interface +'''''''''''''''''''' + +.. autoclass:: packaging.metadata.Metadata + :members: + Low Level Interface ''''''''''''''''''' -.. automodule:: packaging.metadata +.. autoclass:: packaging.metadata.RawMetadata + :members: + +.. autofunction:: packaging.metadata.parse_email + + +Exceptions +'''''''''' + +.. autoclass:: packaging.metadata.InvalidMetadata + :members: + +.. autoclass:: packaging.metadata.ExceptionGroup :members: diff --git a/docs/utils.rst b/docs/utils.rst index 7a4c2f74..6c552555 100644 --- a/docs/utils.rst +++ b/docs/utils.rst @@ -14,7 +14,7 @@ Reference A :class:`typing.NewType` of :class:`str`, representing a normalized name. -.. function:: canonicalize_name(name) +.. function:: canonicalize_name(name, validate=False) This function takes a valid Python package or extra name, and returns the normalized form of it. @@ -23,7 +23,13 @@ Reference checkers to help require that a string has passed through this function before use. + If **validate** is true, then the function will check if **name** is a valid + distribution name before normalizing. + :param str name: The name to normalize. + :param bool validate: Check whether the name is a valid distribution name. + :raises InvalidName: If **validate** is true and the name is not an + acceptable distribution name. .. doctest:: @@ -35,6 +41,21 @@ Reference >>> canonicalize_name("requests") 'requests' +.. function:: is_normalized_name(name) + + Check if a name is already normalized (i.e. :func:`canonicalize_name` would + roundtrip to the same value). + + :param str name: The name to check. + + .. doctest:: + + >>> from packaging.utils import is_normalized_name + >>> is_normalized_name("requests") + True + >>> is_normalized_name("Django") + False + .. function:: canonicalize_version(version) This function takes a string representing a package version (or a @@ -103,6 +124,9 @@ Reference >>> ver == Version('1.0') True +.. exception:: InvalidName + + Raised when a distribution name is invalid. .. exception:: InvalidWheelFilename diff --git a/src/packaging/metadata.py b/src/packaging/metadata.py index b6dda521..7b0e6a9c 100644 --- a/src/packaging/metadata.py +++ b/src/packaging/metadata.py @@ -5,23 +5,74 @@ import email.policy import sys import typing -from typing import Dict, List, Optional, Tuple, Union, cast - -if sys.version_info >= (3, 8): # pragma: no cover - from typing import TypedDict +from typing import ( + Any, + Callable, + Dict, + Generic, + List, + Optional, + Tuple, + Type, + Union, + cast, +) + +from . import requirements, specifiers, utils, version as version_module + +T = typing.TypeVar("T") +if sys.version_info[:2] >= (3, 8): # pragma: no cover + from typing import Literal, TypedDict else: # pragma: no cover if typing.TYPE_CHECKING: - from typing_extensions import TypedDict + from typing_extensions import Literal, TypedDict else: try: - from typing_extensions import TypedDict + from typing_extensions import Literal, TypedDict except ImportError: + class Literal: + def __init_subclass__(*_args, **_kwargs): + pass + class TypedDict: def __init_subclass__(*_args, **_kwargs): pass +try: + ExceptionGroup = __builtins__.ExceptionGroup # type: ignore[attr-defined] +except AttributeError: + + class ExceptionGroup(Exception): # type: ignore[no-redef] # noqa: N818 + """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. + + If :external:exc:`ExceptionGroup` is already defined by Python itself, + that version is used instead. + """ + + message: str + exceptions: List[Exception] + + def __init__(self, message: str, exceptions: List[Exception]) -> None: + self.message = message + self.exceptions = exceptions + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" + + +class InvalidMetadata(ValueError): + """A metadata field contains invalid data.""" + + field: str + """The name of the field that contains invalid data.""" + + def __init__(self, field: str, message: str) -> None: + self.field = field + super().__init__(message) + + # The RawMetadata class attempts to make as few assumptions about the underlying # serialization formats as possible. The idea is that as long as a serialization # formats offer some very basic primitives in *some* way then we can support @@ -33,7 +84,8 @@ class RawMetadata(TypedDict, total=False): provided). The key is lower-case and underscores are used instead of dashes compared to the equivalent core metadata field. Any core metadata field that can be specified multiple times or can hold multiple values in a single - field have a key with a plural name. + field have a key with a plural name. See :class:`Metadata` whose attributes + match the keys of this dictionary. Core metadata fields that can be specified multiple times are stored as a list or dict depending on which is appropriate for the field. Any fields @@ -110,7 +162,7 @@ class RawMetadata(TypedDict, total=False): "version", } -_LIST_STRING_FIELDS = { +_LIST_FIELDS = { "classifiers", "dynamic", "obsoletes", @@ -125,6 +177,10 @@ class RawMetadata(TypedDict, total=False): "supported_platforms", } +_DICT_FIELDS = { + "project_urls", +} + def _parse_keywords(data: str) -> List[str]: """Split a string of comma-separate keyboards into a list of keywords.""" @@ -230,10 +286,11 @@ def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str: "supported-platform": "supported_platforms", "version": "version", } +_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]: - """Parse a distribution's metadata. + """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). This function returns a two-item tuple of dicts. The first dict is of recognized fields from the core metadata specification. Fields that can be @@ -267,7 +324,7 @@ def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[st # We use get_all() here, even for fields that aren't multiple use, # because otherwise someone could have e.g. two Name fields, and we # would just silently ignore it rather than doing something about it. - headers = parsed.get_all(name) + headers = parsed.get_all(name) or [] # The way the email module works when parsing bytes is that it # unconditionally decodes the bytes as ascii using the surrogateescape @@ -349,7 +406,7 @@ def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[st # If this is one of our list of string fields, then we can just assign # the value, since email *only* has strings, and our get_all() call # above ensures that this is a list. - elif raw_name in _LIST_STRING_FIELDS: + elif raw_name in _LIST_FIELDS: raw[raw_name] = value # Special Case: Keywords # The keywords field is implemented in the metadata spec as a str, @@ -406,3 +463,360 @@ def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[st # way this function is implemented, our `TypedDict` can only have valid key # names. return cast(RawMetadata, raw), unparsed + + +_NOT_FOUND = object() + + +# Keep the two values in sync. +_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] +_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] + +_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) + + +class _Validator(Generic[T]): + """Validate a metadata field. + + All _process_*() methods correspond to a core metadata field. The method is + called with the field's raw value. If the raw value is valid it is returned + in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). + If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause + as appropriate). + """ + + name: str + raw_name: str + added: _MetadataVersion + + def __init__( + self, + *, + added: _MetadataVersion = "1.0", + ) -> None: + self.added = added + + def __set_name__(self, _owner: "Metadata", name: str) -> None: + self.name = name + self.raw_name = _RAW_TO_EMAIL_MAPPING[name] + + def __get__(self, instance: "Metadata", _owner: Type["Metadata"]) -> T: + # With Python 3.8, the caching can be replaced with functools.cached_property(). + # No need to check the cache as attribute lookup will resolve into the + # instance's __dict__ before __get__ is called. + cache = instance.__dict__ + try: + value = instance._raw[self.name] # type: ignore[literal-required] + except KeyError: + if self.name in _STRING_FIELDS: + value = "" + elif self.name in _LIST_FIELDS: + value = [] + elif self.name in _DICT_FIELDS: + value = {} + else: # pragma: no cover + assert False + + try: + converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") + except AttributeError: + pass + else: + value = converter(value) + + cache[self.name] = value + try: + del instance._raw[self.name] # type: ignore[misc] + except KeyError: + pass + + return cast(T, value) + + def _invalid_metadata( + self, msg: str, cause: Optional[Exception] = None + ) -> InvalidMetadata: + exc = InvalidMetadata( + self.raw_name, msg.format_map({"field": repr(self.raw_name)}) + ) + exc.__cause__ = cause + return exc + + def _process_metadata_version(self, value: str) -> _MetadataVersion: + # Implicitly makes Metadata-Version required. + if value not in _VALID_METADATA_VERSIONS: + raise self._invalid_metadata(f"{value!r} is not a valid metadata version") + return cast(_MetadataVersion, value) + + def _process_name(self, value: str) -> str: + if not value: + raise self._invalid_metadata("{field} is a required field") + # Validate the name as a side-effect. + try: + utils.canonicalize_name(value, validate=True) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + else: + return value + + def _process_version(self, value: str) -> version_module.Version: + if not value: + raise self._invalid_metadata("{field} is a required field") + try: + return version_module.parse(value) + except version_module.InvalidVersion as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_summary(self, value: str) -> str: + """Check the field contains no newlines.""" + if "\n" in value: + raise self._invalid_metadata("{field} must be a single line") + return value + + def _process_description_content_type(self, value: str) -> str: + content_types = {"text/plain", "text/x-rst", "text/markdown"} + message = email.message.EmailMessage() + message["content-type"] = value + + content_type, parameters = ( + # Defaults to `text/plain` if parsing failed. + message.get_content_type().lower(), + message["content-type"].params, + ) + # Check if content-type is valid or defaulted to `text/plain` and thus was + # not parseable. + if content_type not in content_types or content_type not in value.lower(): + raise self._invalid_metadata( + f"{{field}} must be one of {list(content_types)}, not {value!r}" + ) + + charset = parameters.get("charset", "UTF-8") + if charset != "UTF-8": + raise self._invalid_metadata( + f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" + ) + + markdown_variants = {"GFM", "CommonMark"} + variant = parameters.get("variant", "GFM") # Use an acceptable default. + if content_type == "text/markdown" and variant not in markdown_variants: + raise self._invalid_metadata( + f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " + f"not {variant!r}", + ) + return value + + def _process_dynamic(self, value: List[str]) -> List[str]: + for dynamic_field in map(str.lower, value): + if dynamic_field in {"name", "version", "metadata-version"}: + raise self._invalid_metadata( + f"{value!r} is not allowed as a dynamic field" + ) + elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: + raise self._invalid_metadata(f"{value!r} is not a valid dynamic field") + return list(map(str.lower, value)) + + def _process_provides_extra( + self, + value: List[str], + ) -> List[utils.NormalizedName]: + normalized_names = [] + try: + for name in value: + normalized_names.append(utils.canonicalize_name(name, validate=True)) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{name!r} is invalid for {{field}}", cause=exc + ) + else: + return normalized_names + + def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: + try: + return specifiers.SpecifierSet(value) + except specifiers.InvalidSpecifier as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_requires_dist( + self, + value: List[str], + ) -> List[requirements.Requirement]: + reqs = [] + try: + for req in value: + reqs.append(requirements.Requirement(req)) + except requirements.InvalidRequirement as exc: + raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc) + else: + return reqs + + +class Metadata: + """Representation of distribution metadata. + + Compared to :class:`RawMetadata`, this class provides objects representing + metadata fields instead of only using built-in types. Any invalid metadata + will cause :exc:`InvalidMetadata` to be raised (with a + :py:attr:`~BaseException.__cause__` attribute as appropriate). + """ + + _raw: RawMetadata + + @classmethod + def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> "Metadata": + """Create an instance from :class:`RawMetadata`. + + If *validate* is true, all metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + ins = cls() + ins._raw = data.copy() # Mutations occur due to caching enriched values. + + if validate: + exceptions: List[InvalidMetadata] = [] + try: + metadata_version = ins.metadata_version + metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) + except InvalidMetadata as metadata_version_exc: + exceptions.append(metadata_version_exc) + metadata_version = None + + # Make sure to check for the fields that are present, the required + # fields (so their absence can be reported). + fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS + # Remove fields that have already been checked. + fields_to_check -= {"metadata_version"} + + for key in fields_to_check: + try: + if metadata_version: + # Can't use getattr() as that triggers descriptor protocol which + # will fail due to no value for the instance argument. + try: + field_metadata_version = cls.__dict__[key].added + except KeyError: + exc = InvalidMetadata(key, f"unrecognized field: {key!r}") + exceptions.append(exc) + continue + field_age = _VALID_METADATA_VERSIONS.index( + field_metadata_version + ) + if field_age > metadata_age: + field = _RAW_TO_EMAIL_MAPPING[key] + exc = InvalidMetadata( + field, + "{field} introduced in metadata version " + "{field_metadata_version}, not {metadata_version}", + ) + exceptions.append(exc) + continue + getattr(ins, key) + except InvalidMetadata as exc: + exceptions.append(exc) + + if exceptions: + raise ExceptionGroup("invalid metadata", exceptions) + + return ins + + @classmethod + def from_email( + cls, data: Union[bytes, str], *, validate: bool = True + ) -> "Metadata": + """Parse metadata from email headers. + + If *validate* is true, the metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + exceptions: list[InvalidMetadata] = [] + raw, unparsed = parse_email(data) + + if validate: + for unparsed_key in unparsed: + if unparsed_key in _EMAIL_TO_RAW_MAPPING: + message = f"{unparsed_key!r} has invalid data" + else: + message = f"unrecognized field: {unparsed_key!r}" + exceptions.append(InvalidMetadata(unparsed_key, message)) + + if exceptions: + raise ExceptionGroup("unparsed", exceptions) + + try: + return cls.from_raw(raw, validate=validate) + except ExceptionGroup as exc_group: + exceptions.extend(exc_group.exceptions) + raise ExceptionGroup("invalid or unparsed metadata", exceptions) from None + + metadata_version: _Validator[_MetadataVersion] = _Validator() + """:external:ref:`core-metadata-metadata-version` + (required; validated to be a valid metadata version)""" + name: _Validator[str] = _Validator() + """:external:ref:`core-metadata-name` + (required; validated using :func:`~packaging.utils.canonicalize_name` and its + *validate* parameter)""" + version: _Validator[version_module.Version] = _Validator() + """:external:ref:`core-metadata-version` (required)""" + dynamic: _Validator[List[str]] = _Validator( + added="2.2", + ) + """:external:ref:`core-metadata-dynamic` + (validated against core metadata field names and lowercased)""" + platforms: _Validator[List[str]] = _Validator() + """:external:ref:`core-metadata-platform`""" + supported_platforms: _Validator[List[str]] = _Validator(added="1.1") + """:external:ref:`core-metadata-supported-platform`""" + summary: _Validator[str] = _Validator() + """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" + description: _Validator[str] = _Validator() # TODO 2.1: can be in body + """:external:ref:`core-metadata-description`""" + description_content_type: _Validator[str] = _Validator(added="2.1") + """:external:ref:`core-metadata-description-content-type` (validated)""" + keywords: _Validator[List[str]] = _Validator() + """:external:ref:`core-metadata-keywords`""" + home_page: _Validator[str] = _Validator() + """:external:ref:`core-metadata-home-page`""" + download_url: _Validator[str] = _Validator(added="1.1") + """:external:ref:`core-metadata-download-url`""" + author: _Validator[str] = _Validator() + """:external:ref:`core-metadata-author`""" + author_email: _Validator[str] = _Validator() + """:external:ref:`core-metadata-author-email`""" + maintainer: _Validator[str] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer`""" + maintainer_email: _Validator[str] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer-email`""" + license: _Validator[str] = _Validator() + """:external:ref:`core-metadata-license`""" + classifiers: _Validator[List[str]] = _Validator(added="1.1") + """:external:ref:`core-metadata-classifier`""" + requires_dist: _Validator[List[requirements.Requirement]] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-dist`""" + requires_python: _Validator[specifiers.SpecifierSet] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-python`""" + # Because `Requires-External` allows for non-PEP 440 version specifiers, we + # don't do any processing on the values. + requires_external: _Validator[List[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-external`""" + project_urls: _Validator[Dict[str, str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-project-url`""" + # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation + # regardless of metadata version. + provides_extra: _Validator[List[utils.NormalizedName]] = _Validator( + added="2.1", + ) + """:external:ref:`core-metadata-provides-extra`""" + provides_dist: _Validator[List[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-provides-dist`""" + obsoletes_dist: _Validator[List[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-obsoletes-dist`""" + requires: _Validator[List[str]] = _Validator(added="1.1") + """``Requires`` (deprecated)""" + provides: _Validator[List[str]] = _Validator(added="1.1") + """``Provides`` (deprecated)""" + obsoletes: _Validator[List[str]] = _Validator(added="1.1") + """``Obsoletes`` (deprecated)""" diff --git a/src/packaging/utils.py b/src/packaging/utils.py index 33c613b7..325687bd 100644 --- a/src/packaging/utils.py +++ b/src/packaging/utils.py @@ -12,6 +12,12 @@ NormalizedName = NewType("NormalizedName", str) +class InvalidName(ValueError): + """ + An invalid distribution name; users should refer to the packaging user guide. + """ + + class InvalidWheelFilename(ValueError): """ An invalid wheel filename was found, users should refer to PEP 427. @@ -24,17 +30,28 @@ class InvalidSdistFilename(ValueError): """ +# Core metadata spec for `Name` +_validate_regex = re.compile( + r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE +) _canonicalize_regex = re.compile(r"[-_.]+") +_normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$") # PEP 427: The build number must start with a digit. _build_tag_regex = re.compile(r"(\d+)(.*)") -def canonicalize_name(name: str) -> NormalizedName: +def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName: + if validate and not _validate_regex.match(name): + raise InvalidName(f"name is invalid: {name!r}") # This is taken from PEP 503. value = _canonicalize_regex.sub("-", name).lower() return cast(NormalizedName, value) +def is_normalized_name(name: str) -> bool: + return _normalized_regex.match(name) is not None + + def canonicalize_version( version: Union[Version, str], *, strip_trailing_zero: bool = True ) -> str: @@ -100,7 +117,7 @@ def parse_wheel_filename( parts = filename.split("-", dashes - 2) name_part = parts[0] - # See PEP 427 for the rules on escaping the project name + # See PEP 427 for the rules on escaping the project name. if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: raise InvalidWheelFilename(f"Invalid project name: {filename}") name = canonicalize_name(name_part) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 22fe76ba..45c05230 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -2,18 +2,15 @@ import pytest -from packaging import metadata - -_RAW_TO_EMAIL_MAPPING = { - raw: email for email, raw in metadata._EMAIL_TO_RAW_MAPPING.items() -} +from packaging import metadata, requirements, specifiers, utils, version +from packaging.metadata import ExceptionGroup class TestRawMetadata: @pytest.mark.parametrize("raw_field", metadata._STRING_FIELDS) def test_non_repeating_fields_only_once(self, raw_field): data = "VaLuE" - header_field = _RAW_TO_EMAIL_MAPPING[raw_field] + header_field = metadata._RAW_TO_EMAIL_MAPPING[raw_field] single_header = f"{header_field}: {data}" raw, unparsed = metadata.parse_email(single_header) assert not unparsed @@ -23,7 +20,7 @@ def test_non_repeating_fields_only_once(self, raw_field): @pytest.mark.parametrize("raw_field", metadata._STRING_FIELDS) def test_non_repeating_fields_repeated(self, raw_field): - header_field = _RAW_TO_EMAIL_MAPPING[raw_field] + header_field = metadata._RAW_TO_EMAIL_MAPPING[raw_field] data = "VaLuE" single_header = f"{header_field}: {data}" repeated_header = "\n".join([single_header] * 2) @@ -33,10 +30,10 @@ def test_non_repeating_fields_repeated(self, raw_field): assert header_field in unparsed assert unparsed[header_field] == [data] * 2 - @pytest.mark.parametrize("raw_field", metadata._LIST_STRING_FIELDS) + @pytest.mark.parametrize("raw_field", metadata._LIST_FIELDS) def test_repeating_fields_only_once(self, raw_field): data = "VaLuE" - header_field = _RAW_TO_EMAIL_MAPPING[raw_field] + header_field = metadata._RAW_TO_EMAIL_MAPPING[raw_field] single_header = f"{header_field}: {data}" raw, unparsed = metadata.parse_email(single_header) assert not unparsed @@ -44,9 +41,9 @@ def test_repeating_fields_only_once(self, raw_field): assert raw_field in raw assert raw[raw_field] == [data] - @pytest.mark.parametrize("raw_field", metadata._LIST_STRING_FIELDS) + @pytest.mark.parametrize("raw_field", metadata._LIST_FIELDS) def test_repeating_fields_repeated(self, raw_field): - header_field = _RAW_TO_EMAIL_MAPPING[raw_field] + header_field = metadata._RAW_TO_EMAIL_MAPPING[raw_field] data = "VaLuE" single_header = f"{header_field}: {data}" repeated_header = "\n".join([single_header] * 2) @@ -247,3 +244,387 @@ def test_complete(self): ] assert raw["dynamic"] == ["Obsoletes-Dist"] assert raw["description"] == "This description intentionally left blank.\n" + + +class TestExceptionGroup: + def test_attributes(self): + individual_exception = Exception("not important") + exc = metadata.ExceptionGroup("message", [individual_exception]) + assert exc.message == "message" + assert exc.exceptions == [individual_exception] + + def test_repr(self): + individual_exception = RuntimeError("not important") + exc = metadata.ExceptionGroup("message", [individual_exception]) + assert individual_exception.__class__.__name__ in repr(exc) + + +_RAW_EXAMPLE = { + "metadata_version": "2.3", + "name": "packaging", + "version": "2023.0.0", +} + + +class TestMetadata: + def _invalid_with_cause(self, meta, attr, cause=None, *, field=None): + if field is None: + field = attr + with pytest.raises(metadata.InvalidMetadata) as exc_info: + getattr(meta, attr) + exc = exc_info.value + assert exc.field == field + if cause is None: + assert exc.__cause__ is None + else: + assert isinstance(exc.__cause__, cause) + + def test_from_email(self): + metadata_version = "2.3" + meta = metadata.Metadata.from_email( + f"Metadata-Version: {metadata_version}", validate=False + ) + + assert meta.metadata_version == metadata_version + + def test_from_email_unparsed(self): + with pytest.raises(ExceptionGroup) as exc_info: + metadata.Metadata.from_email("Hello: PyPA") + + assert len(exc_info.exceptions) == 1 + assert isinstance(exc_info.exceptions[0], metadata.InvalidMetadata) + + def test_from_email_validate(self): + with pytest.raises(ExceptionGroup): + # Lacking all required fields. + metadata.Metadata.from_email("Name: packaging", validate=True) + + def test_from_email_unparsed_valid_field_name(self): + with pytest.raises(ExceptionGroup): + metadata.Metadata.from_email( + "Project-URL: A, B\nProject-URL: A, C", validate=True + ) + + def test_required_fields(self): + meta = metadata.Metadata.from_raw(_RAW_EXAMPLE) + + assert meta.metadata_version == _RAW_EXAMPLE["metadata_version"] + + @pytest.mark.parametrize("field", list(_RAW_EXAMPLE.keys())) + def test_required_fields_missing(self, field): + required_fields = _RAW_EXAMPLE.copy() + + del required_fields[field] + + with pytest.raises(ExceptionGroup): + metadata.Metadata.from_raw(required_fields) + + def test_raw_validate_unrecognized_field(self): + raw = { + "metadata_version": "2.3", + "name": "packaging", + "version": "2023.0.0", + } + + # Safety check. + assert metadata.Metadata.from_raw(raw, validate=True) + + raw["dynamc"] = ["Obsoletes-Dist"] # Misspelled; missing an "i". + + with pytest.raises(ExceptionGroup): + metadata.Metadata.from_raw(raw, validate=True) + + def test_raw_data_not_mutated(self): + raw = _RAW_EXAMPLE.copy() + meta = metadata.Metadata.from_raw(raw, validate=True) + + assert meta.version == version.Version(_RAW_EXAMPLE["version"]) + assert raw == _RAW_EXAMPLE + + def test_caching(self): + meta = metadata.Metadata.from_raw(_RAW_EXAMPLE, validate=True) + + assert meta.version is meta.version + + def test_from_raw_validate(self): + required_fields = _RAW_EXAMPLE.copy() + required_fields["version"] = "-----" + + with pytest.raises(ExceptionGroup): + # Multiple things to trigger a validation error: + # invalid version, missing keys, etc. + metadata.Metadata.from_raw(required_fields) + + @pytest.mark.parametrize("meta_version", ["2.2", "2.3"]) + def test_metadata_version_field_introduction(self, meta_version): + raw = { + "metadata_version": meta_version, + "name": "packaging", + "version": "2023.0.0", + "dynamic": ["Obsoletes-Dist"], # Introduced in 2.2. + } + + assert metadata.Metadata.from_raw(raw, validate=True) + + @pytest.mark.parametrize("meta_version", ["1.0", "1.1", "1.2", "2.1"]) + def test_metadata_version_field_introduction_mismatch(self, meta_version): + raw = { + "metadata_version": meta_version, + "name": "packaging", + "version": "2023.0.0", + "dynamic": ["Obsoletes-Dist"], # Introduced in 2.2. + } + + with pytest.raises(ExceptionGroup): + metadata.Metadata.from_raw(raw, validate=True) + + @pytest.mark.parametrize("field", metadata._DICT_FIELDS) + def test_dict_default(self, field): + empty_meta = metadata.Metadata.from_raw({}, validate=False) + + assert getattr(empty_meta, field) == {} + + @pytest.mark.parametrize( + "attribute", + [ + "description", + "home_page", + "download_url", + "author", + "author_email", + "maintainer", + "maintainer_email", + "license", + ], + ) + def test_single_value_unvalidated_attribute(self, attribute): + value = "Not important" + meta = metadata.Metadata.from_raw({attribute: value}, validate=False) + + assert getattr(meta, attribute) == value + + empty_meta = metadata.Metadata.from_raw({}, validate=False) + + assert getattr(empty_meta, attribute) == "" + + @pytest.mark.parametrize( + "attribute", + [ + "supported_platforms", + "platforms", + "classifiers", + "provides_dist", + "obsoletes_dist", + "requires", + "provides", + "obsoletes", + ], + ) + def test_multi_value_unvalidated_attribute(self, attribute): + values = ["Not important", "Still not important"] + meta = metadata.Metadata.from_raw({attribute: values}, validate=False) + + assert getattr(meta, attribute) == values + + empty_meta = metadata.Metadata.from_raw({}, validate=False) + assert getattr(empty_meta, attribute) == [] + + def test_mapping_default_attribute(self): + empty_meta = metadata.Metadata.from_raw({}, validate=False) + + assert empty_meta.project_urls == {} + + @pytest.mark.parametrize("version", ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"]) + def test_valid_metadata_version(self, version): + meta = metadata.Metadata.from_raw({"metadata_version": version}, validate=False) + + assert meta.metadata_version == version + + @pytest.mark.parametrize("version", ["1.3", "2.0"]) + def test_invalid_metadata_version(self, version): + meta = metadata.Metadata.from_raw({"metadata_version": version}, validate=False) + + with pytest.raises(metadata.InvalidMetadata): + meta.metadata_version + + def test_valid_version(self): + version_str = "1.2.3" + meta = metadata.Metadata.from_raw({"version": version_str}, validate=False) + assert meta.version == version.parse(version_str) + + def test_missing_version(self): + meta = metadata.Metadata.from_raw({}, validate=False) + with pytest.raises(metadata.InvalidMetadata) as exc_info: + meta.version + assert exc_info.value.field == "version" + + def test_invalid_version(self): + meta = metadata.Metadata.from_raw({"version": "a.b.c"}, validate=False) + self._invalid_with_cause(meta, "version", version.InvalidVersion) + + def test_valid_summary(self): + summary = "Hello" + meta = metadata.Metadata.from_raw({"summary": summary}, validate=False) + + assert meta.summary == summary + + def test_invalid_summary(self): + meta = metadata.Metadata.from_raw( + {"summary": "Hello\n Again"}, validate=False + ) + + with pytest.raises(metadata.InvalidMetadata) as exc_info: + meta.summary + assert exc_info.value.field == "summary" + + def test_valid_name(self): + name = "Hello_World" + meta = metadata.Metadata.from_raw({"name": name}, validate=False) + assert meta.name == name + + def test_invalid_name(self): + meta = metadata.Metadata.from_raw({"name": "-not-legal"}, validate=False) + self._invalid_with_cause(meta, "name", utils.InvalidName) + + @pytest.mark.parametrize( + "content_type", + [ + "text/plain", + "TEXT/PLAIN", + "text/x-rst", + "text/markdown", + "text/plain; charset=UTF-8", + "text/x-rst; charset=UTF-8", + "text/markdown; charset=UTF-8; variant=GFM", + "text/markdown; charset=UTF-8; variant=CommonMark", + "text/markdown; variant=GFM", + "text/markdown; variant=CommonMark", + ], + ) + def test_valid_description_content_type(self, content_type): + meta = metadata.Metadata.from_raw( + {"description_content_type": content_type}, validate=False + ) + + assert meta.description_content_type == content_type + + @pytest.mark.parametrize( + "content_type", + [ + "application/json", + "text/plain; charset=ascii", + "text/plain; charset=utf-8", + "text/markdown; variant=gfm", + "text/markdown; variant=commonmark", + ], + ) + def test_invalid_description_content_type(self, content_type): + meta = metadata.Metadata.from_raw( + {"description_content_type": content_type}, validate=False + ) + + with pytest.raises(metadata.InvalidMetadata): + meta.description_content_type + + def test_keywords(self): + keywords = ["hello", "world"] + meta = metadata.Metadata.from_raw({"keywords": keywords}, validate=False) + + assert meta.keywords == keywords + + def test_valid_project_urls(self): + urls = { + "Documentation": "https://example.com/BeagleVote", + "Bug Tracker": "http://bitbucket.org/tarek/distribute/issues/", + } + meta = metadata.Metadata.from_raw({"project_urls": urls}, validate=False) + + assert meta.project_urls == urls + + @pytest.mark.parametrize("specifier", [">=3", ">2.6,!=3.0.*,!=3.1.*", "~=2.6"]) + def test_valid_requires_python(self, specifier): + expected = specifiers.SpecifierSet(specifier) + meta = metadata.Metadata.from_raw( + {"requires_python": specifier}, validate=False + ) + + assert meta.requires_python == expected + + def test_invalid_requires_python(self): + meta = metadata.Metadata.from_raw( + {"requires_python": "NotReal"}, validate=False + ) + self._invalid_with_cause( + meta, + "requires_python", + specifiers.InvalidSpecifier, + field="requires-python", + ) + + def test_requires_external(self): + externals = [ + "C", + "libpng (>=1.5)", + 'make; sys_platform != "win32"', + "libjpeg (>6b)", + ] + meta = metadata.Metadata.from_raw( + {"requires_external": externals}, validate=False + ) + + assert meta.requires_external == externals + + def test_valid_provides_extra(self): + extras = ["dev", "test"] + meta = metadata.Metadata.from_raw({"provides_extra": extras}, validate=False) + + assert meta.provides_extra == extras + + def test_invalid_provides_extra(self): + extras = ["pdf", "-Not-Valid", "ok"] + meta = metadata.Metadata.from_raw({"provides_extra": extras}, validate=False) + self._invalid_with_cause( + meta, "provides_extra", utils.InvalidName, field="provides-extra" + ) + + def test_valid_requires_dist(self): + requires = [ + "pkginfo", + "PasteDeploy", + "zope.interface (>3.5.0)", + "pywin32 >1.0; sys_platform == 'win32'", + ] + expected_requires = list(map(requirements.Requirement, requires)) + meta = metadata.Metadata.from_raw({"requires_dist": requires}, validate=False) + + assert meta.requires_dist == expected_requires + + def test_invalid_requires_dist(self): + requires = ["pkginfo", "-not-real", "zope.interface (>3.5.0)"] + meta = metadata.Metadata.from_raw({"requires_dist": requires}, validate=False) + self._invalid_with_cause( + meta, + "requires_dist", + requirements.InvalidRequirement, + field="requires-dist", + ) + + def test_valid_dynamic(self): + dynamic = ["Keywords", "Home-Page", "Author"] + meta = metadata.Metadata.from_raw({"dynamic": dynamic}, validate=False) + + assert meta.dynamic == [d.lower() for d in dynamic] + + def test_invalid_dynamic_value(self): + dynamic = ["Keywords", "NotReal", "Author"] + meta = metadata.Metadata.from_raw({"dynamic": dynamic}, validate=False) + + with pytest.raises(metadata.InvalidMetadata): + meta.dynamic + + @pytest.mark.parametrize("field_name", ["name", "version", "metadata-version"]) + def test_disallowed_dynamic(self, field_name): + meta = metadata.Metadata.from_raw({"dynamic": [field_name]}, validate=False) + + with pytest.raises(metadata.InvalidMetadata): + meta.dynamic diff --git a/tests/test_utils.py b/tests/test_utils.py index a6c6711d..d6e810ac 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -6,10 +6,12 @@ from packaging.tags import Tag from packaging.utils import ( + InvalidName, InvalidSdistFilename, InvalidWheelFilename, canonicalize_name, canonicalize_version, + is_normalized_name, parse_sdist_filename, parse_wheel_filename, ) @@ -35,6 +37,33 @@ def test_canonicalize_name(name, expected): assert canonicalize_name(name) == expected +def test_canonicalize_name_invalid(): + with pytest.raises(InvalidName): + canonicalize_name("_not_legal", validate=True) + assert canonicalize_name("_not_legal") == "-not-legal" + + +@pytest.mark.parametrize( + ("name", "expected"), + [ + ("foo", "foo"), + ("Foo", "foo"), + ("fOo", "foo"), + ("foo.bar", "foo-bar"), + ("Foo.Bar", "foo-bar"), + ("Foo.....Bar", "foo-bar"), + ("foo_bar", "foo-bar"), + ("foo___bar", "foo-bar"), + ("foo-bar", "foo-bar"), + ("foo----bar", "foo-bar"), + ], +) +def test_is_normalized_name(name, expected): + assert is_normalized_name(expected) + if name != expected: + assert not is_normalized_name(name) + + @pytest.mark.parametrize( ("version", "expected"), [