pydantic · sydney-runkle · Apr 11, 2024 · Apr 6, 2024 · Apr 6, 2024 · Apr 6, 2024
diff --git a/docs/concepts/json.md b/docs/concepts/json.md
@@ -6,9 +6,9 @@
 ## Json Parsing
 
 ??? api "API Documentation"
-    [`pydantic.main.BaseModel.model_validate_json`][pydantic.main.BaseModel.model_validate_json]<br>
-    [`pydantic.type_adapter.TypeAdapter.validate_json`][pydantic.type_adapter.TypeAdapter.validate_json]<br>
-    [`pydantic_core.from_json`][pydantic_core.from_json]<br>
+    [`pydantic.main.BaseModel.model_validate_json`][pydantic.main.BaseModel.model_validate_json]
+    [`pydantic.type_adapter.TypeAdapter.validate_json`][pydantic.type_adapter.TypeAdapter.validate_json]
+    [`pydantic_core.from_json`][pydantic_core.from_json]
 
 Pydantic provides builtin JSON parsing, which helps achieve:
 
@@ -60,6 +60,156 @@ with one noticeable enhancement being that `jiter` supports deserialization of `
 In the future, `jiter` is intended to enable support validation errors to include the location
 in the original JSON input which contained the invalid value.
 
+### Partial JSON Parsing
+
+**Starting in v2.7.0**, Pydantic's [JSON parser](https://docs.rs/jiter/latest/jiter/) offers support for partial JSON parsing, which is exposed via [`pydantic_core.from_json`][pydantic_core.from_json]. Here's an example of this feature in action:
+
+```py
+from pydantic_core import from_json
+
+partial_json_data = '["aa", "bb", "c'  # (1)!
+
+try:
+    result = from_json(partial_json_data, allow_partial=False)
+except ValueError as e:
+    print(e)  # (2)!
+    #> EOF while parsing a string at line 1 column 15
+
+result = from_json(partial_json_data, allow_partial=True)
+print(result)  # (3)!
+#> ['aa', 'bb']
+```
+
+1. The JSON list is incomplete - it's missing a closing `"]`
+2. When `allow_partial` is set to `False` (the default), a parsing error occurs.
+3. When `allow_partial` is set to `True`, part of the input is deserialized successfully.
+
+This also works for deserializing partial dictionaries. For example:
+
+```py
+from pydantic_core import from_json
+
+partial_dog_json = '{"breed": "lab", "name": "fluffy", "friends": ["buddy", "spot", "rufus"], "age'
+dog_dict = from_json(partial_dog_json, allow_partial=True)
+print(dog_dict)
+#> {'breed': 'lab', 'name': 'fluffy', 'friends': ['buddy', 'spot', 'rufus']}
+```
+
+!!! tip "Validating LLM Output"
+    This feature is particularly beneficial for validating LLM outputs.
+    We've written some blog posts about this topic, which you can find [here](https://blog.pydantic.dev/blog/category/llms/).
+
+In future versions of Pydantic, we expect to expand support for this feature through either Pydantic's other JSON validation functions
+([`pydantic.main.BaseModel.model_validate_json`][pydantic.main.BaseModel.model_validate_json] and
+[`pydantic.type_adapter.TypeAdapter.validate_json`][pydantic.type_adapter.TypeAdapter.validate_json]) or model configuration. Stay tuned 🚀!
+
+For now, you can use [`pydantic_core.from_json`][pydantic_core.from_json] in combination with [`pydantic.main.BaseModel.model_validate`][pydantic.main.BaseModel.model_validate] to achieve the same result. Here's an example:
+
+```py
+from pydantic_core import from_json
+
+from pydantic import BaseModel
+
+
+class Dog(BaseModel):
+    breed: str
+    name: str
+    friends: list
+
+
+partial_dog_json = '{"breed": "lab", "name": "fluffy", "friends": ["buddy", "spot", "rufus"], "age'
+dog = Dog.model_validate(from_json(partial_dog_json, allow_partial=True))
+print(repr(dog))
+#> Dog(breed='lab', name='fluffy', friends=['buddy', 'spot', 'rufus'])
+```
+
+!!! tip
+    For partial JSON parsing to work reliably, all fields on the model should have default values.
+
+Check out the following example for a more in-depth look at how to use default values with partial JSON parsing:
+
+!!! example "Using default values with partial JSON parsing"
+
+    ```py
+    from typing import Any, Optional, Tuple
+
+    import pydantic_core
+    from typing_extensions import Annotated
+
+    from pydantic import BaseModel, ValidationError, WrapValidator
+
+
+    def default_on_error(v, handler) -> Any:
+        """
+        Raise a PydanticUseDefault exception if the value is missing.
+
+        This is useful for avoiding errors from partial
+        JSON preventing successful validation.
+        """
+        try:
+            return handler(v)
+        except ValidationError as exc:
+            # there might be other types of errors resulting from partial JSON parsing
+            # that you allow here, feel free to customize as needed
+            if all(e['type'] == 'missing' for e in exc.errors()):
+                raise pydantic_core.PydanticUseDefault()
+            else:
+                raise
+
+
+    class NestedModel(BaseModel):
+        x: int
+        y: str
+
+
+    class MyModel(BaseModel):
+        foo: Optional[str] = None
+        bar: Annotated[
+            Optional[Tuple[str, int]], WrapValidator(default_on_error)
+        ] = None
+        nested: Annotated[
+            Optional[NestedModel], WrapValidator(default_on_error)
+        ] = None
+
+
+    m = MyModel.model_validate(
+        pydantic_core.from_json('{"foo": "x", "bar": ["world",', allow_partial=True)
+    )
+    print(repr(m))
+    #> MyModel(foo='x', bar=None, nested=None)
+
+
+    m = MyModel.model_validate(
+        pydantic_core.from_json(
+            '{"foo": "x", "bar": ["world", 1], "nested": {"x":', allow_partial=True
+        )
+    )
+    print(repr(m))
+    #> MyModel(foo='x', bar=('world', 1), nested=None)
+    ```
+
+### Caching Strings
+
+**Starting in v2.7.0**, Pydantic's [JSON parser](https://docs.rs/jiter/latest/jiter/) offers support for configuring how Python strings are cached during JSON parsing and validation (when Python strings are constructed from Rust strings during Python validation, e.g. after `strip_whitespace=True`).
+The `cache_strings` setting is exposed via both [model config][pydantic.config.ConfigDict] and [`pydantic_core.from_json`][pydantic_core.from_json].
+
+The `cache_strings` setting can take any of the following values:
+
+* `True` or `'all'` (the default): cache all strings
+* `'keys'`: cache only dictionary keys, this **only** applies when used with [`pydantic_core.from_json`][pydantic_core.from_json] or when parsing JSON using [`Json`][pydantic.types.Json]
+* `False` or `'none'`: no caching
+
+Using the string caching feature results in performance improvements, but increases memory usage slightly.
+
+!!! note "String Caching Details"
+
+    1. Strings are cached using a fully associative cache with a size of
+    [16,384](https://github.com/pydantic/jiter/blob/5bbdcfd22882b7b286416b22f74abd549c7b2fd7/src/py_string_cache.rs#L113).
+    2. Only strings where `len(string) < 64` are cached.
+    3. There is some overhead to looking up the cache, which is normally worth it to avoid constructing strings.
+    However, if you know there will be very few repeated strings in your data, you might get a performance boost by disabling this setting with `cache_strings=False`.
+
+
 ## JSON Serialization
 
 ??? api "API Documentation"

diff --git a/pydantic/_internal/_config.py b/pydantic/_internal/_config.py
@@ -83,6 +83,7 @@ class ConfigWrapper:
     regex_engine: Literal['rust-regex', 'python-re']
     validation_error_cause: bool
     use_attribute_docstrings: bool
+    cache_strings: bool | Literal['all', 'keys', 'none']
 
     def __init__(self, config: ConfigDict | dict[str, Any] | type[Any] | None, *, check: bool = True):
         if check:
@@ -188,6 +189,7 @@ def dict_not_none(**kwargs: Any) -> Any:
                 coerce_numbers_to_str=self.config_dict.get('coerce_numbers_to_str'),
                 regex_engine=self.config_dict.get('regex_engine'),
                 validation_error_cause=self.config_dict.get('validation_error_cause'),
+                cache_strings=self.config_dict.get('cache_strings'),
             )
         )
         return core_config
@@ -262,6 +264,7 @@ def push(self, config_wrapper: ConfigWrapper | ConfigDict | None):
     regex_engine='rust-regex',
     validation_error_cause=False,
     use_attribute_docstrings=False,
+    cache_strings=True,
 )
 
 

diff --git a/pydantic/config.py b/pydantic/config.py
@@ -944,6 +944,25 @@ class Model(BaseModel):
         can be different depending on the Python version used.
     '''
 
+    cache_strings: bool | Literal['all', 'keys', 'none']
+    """
+    Whether to cache strings to avoid constructing new Python objects. Defaults to True.
+
+    Enabling this setting should significantly improve validation performance while increasing memory usage slightly.
+
+    - `True` or `'all'` (the default): cache all strings
+    - `'keys'`: cache only dictionary keys
+    - `False` or `'none'`: no caching
+
+    !!! note
+        `True` or `'all'` is required to cache strings during general validation because
+        validators don't know if they're in a key or a value.
+
+    !!! tip
+        If repeated strings are rare, it's recommended to use `'keys'` or `'none'` to reduce memory usage,
+        as the performance difference is minimal if repeated strings are rare.
+    """
+
 
 _TypeT = TypeVar('_TypeT', bound=type)