From 858118e64240ae1382959f3a8d49c616f846640b Mon Sep 17 00:00:00 2001 From: Christiaan Biesterbosch Date: Fri, 21 Apr 2023 17:27:59 +0500 Subject: [PATCH] Add a helper macro to help deserialize internally tagged enums with Serde --- Changelog.md | 4 + src/de/mod.rs | 19 +++++ src/serde_helpers.rs | 192 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 215 insertions(+) diff --git a/Changelog.md b/Changelog.md index 1cb0acce..9f251e90 100644 --- a/Changelog.md +++ b/Changelog.md @@ -26,7 +26,11 @@ ### Misc Changes +- [#594]: Add a helper macro to help deserialize internally tagged enums + with Serde, which doesn't work out-of-box due to serde limitations. + [#581]: https://github.com/tafia/quick-xml/pull/581 +[#594]: https://github.com/tafia/quick-xml/pull/594 [#601]: https://github.com/tafia/quick-xml/pull/601 [#603]: https://github.com/tafia/quick-xml/pull/603 [#606]: https://github.com/tafia/quick-xml/pull/606 diff --git a/src/de/mod.rs b/src/de/mod.rs index 20017d00..c68bdb95 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -28,6 +28,7 @@ //! - [Frequently Used Patterns](#frequently-used-patterns) //! - [`` lists](#element-lists) //! - [Enum::Unit Variants As a Text](#enumunit-variants-as-a-text) +//! - [Internally Tagged Enums](#internally-tagged-enums) //! //! //! @@ -1743,10 +1744,28 @@ //! If you still want to keep your struct untouched, you can instead use the //! helper module [`text_content`]. //! +//! +//! Internally Tagged Enums +//! ----------------------- +//! [Tagged enums] are currently not supported because of an issue in the Serde +//! design (see [serde#1183] and [quick-xml#586]) and missing optimizations in +//! serde which could be useful for XML case ([serde#1495]). This can be worked +//! around by manually implementing deserialize with `#[serde(deserialize_with = "func")]` +//! or implementing [`Deserialize`], but this can get very tedious very fast for +//! files with large amounts of tagged enums. To help with this issue the quick-xml +//! provides a macro [`impl_deserialize_for_internally_tagged_enum!`]. See the +//! macro documentation for details. +//! +//! //! [specification]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition //! [`deserialize_with`]: https://serde.rs/field-attrs.html#deserialize_with //! [#497]: https://github.com/tafia/quick-xml/issues/497 //! [`text_content`]: crate::serde_helpers::text_content +//! [Tagged enums]: https://serde.rs/enum-representations.html#internally-tagged +//! [serde#1183]: https://github.com/serde-rs/serde/issues/1183 +//! [serde#1495]: https://github.com/serde-rs/serde/issues/1495 +//! [quick-xml#586]: https://github.com/tafia/quick-xml/issues/586 +//! [`impl_deserialize_for_internally_tagged_enum!`]: crate::impl_deserialize_for_internally_tagged_enum // Macros should be defined before the modules that using them // Also, macros should be imported before using them diff --git a/src/serde_helpers.rs b/src/serde_helpers.rs index b6856afd..f7babf9f 100644 --- a/src/serde_helpers.rs +++ b/src/serde_helpers.rs @@ -2,6 +2,198 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; +#[macro_export] +#[doc(hidden)] +macro_rules! deserialize_variant { + // Produce struct enum variant + ( $de:expr, $enum:tt, $variant:ident { + $( + $(#[$meta:meta])* + $field:ident : $typ:ty + ),* $(,)? + } ) => ({ + let var = { + // Create anonymous type + #[derive(serde::Deserialize)] + struct $variant { + $( + $(#[$meta])* + $field: $typ, + )* + } + <$variant>::deserialize($de)? + }; + // Due to https://github.com/rust-lang/rust/issues/86935 we cannot use + // <$enum> :: $variant + use $enum :: *; + $variant { + $($field: var.$field,)* + } + }); + + // Produce newtype enum variant + ( $de:expr, $enum:tt, $variant:ident($typ:ty) ) => ({ + let var = <$typ>::deserialize($de)?; + <$enum> :: $variant(var) + }); + + // Produce unit enum variant + ( $de:expr, $enum:tt, $variant:ident ) => ({ + serde::de::IgnoredAny::deserialize($de)?; + <$enum> :: $variant + }); +} + +/// A helper to implement [`Deserialize`] for [internally tagged] enums which +/// does not use [`Deserializer::deserialize_any`] that produces wrong results +/// with XML because of [serde#1183]. +/// +/// In contract to deriving [`Deserialize`] this macro assumes that a tag will be +/// the first element or attribute in the XML. +/// +/// # Example +/// +/// ``` +/// # use pretty_assertions::assert_eq; +/// use quick_xml::de::from_str; +/// use quick_xml::impl_deserialize_for_internally_tagged_enum; +/// use serde::Deserialize; +/// +/// #[derive(Deserialize, Debug, PartialEq)] +/// struct Root { +/// one: InternallyTaggedEnum, +/// two: InternallyTaggedEnum, +/// three: InternallyTaggedEnum, +/// } +/// +/// #[derive(Debug, PartialEq)] +/// // #[serde(tag = "@tag")] +/// enum InternallyTaggedEnum { +/// Unit, +/// Newtype(Newtype), +/// Struct { +/// // #[serde(rename = "@attribute")] +/// attribute: u32, +/// element: f32, +/// }, +/// } +/// +/// #[derive(Deserialize, Debug, PartialEq)] +/// struct Newtype { +/// #[serde(rename = "@attribute")] +/// attribute: u64, +/// } +/// +/// // The macro needs the type of the enum, the tag name, +/// // and information about all the variants +/// impl_deserialize_for_internally_tagged_enum!{ +/// InternallyTaggedEnum, "@tag", +/// ("Unit" => Unit), +/// ("Newtype" => Newtype(Newtype)), +/// ("Struct" => Struct { +/// #[serde(rename = "@attribute")] +/// attribute: u32, +/// element: f32, +/// }), +/// } +/// +/// assert_eq!( +/// from_str::(r#" +/// +/// +/// +/// +/// 4.2 +/// +/// +/// "#).unwrap(), +/// Root { +/// one: InternallyTaggedEnum::Unit, +/// two: InternallyTaggedEnum::Newtype(Newtype { attribute: 42 }), +/// three: InternallyTaggedEnum::Struct { +/// attribute: 42, +/// element: 4.2, +/// }, +/// }, +/// ); +/// ``` +/// +/// [internally tagged]: https://serde.rs/enum-representations.html#internally-tagged +/// [serde#1183]: https://github.com/serde-rs/serde/issues/1183 +#[macro_export(local_inner_macros)] +macro_rules! impl_deserialize_for_internally_tagged_enum { + ( + $enum:ty, + $tag:literal, + $( + ($variant_tag:literal => $($variant:tt)+ ) + ),* $(,)? + ) => { + impl<'de> serde::de::Deserialize<'de> for $enum { + fn deserialize(deserializer: D) -> Result + where + D: serde::de::Deserializer<'de>, + { + use serde::de::{Error, MapAccess, Visitor}; + + // The Visitor struct is normally used for state, but none is needed + struct TheVisitor; + // The main logic of the deserializing happens in the Visitor trait + impl<'de> Visitor<'de> for TheVisitor { + // The type that is being deserialized + type Value = $enum; + + // Try to give a better error message when this is used wrong + fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.write_str("expecting map with tag in ")?; + f.write_str($tag) + } + + // The xml data is provided as an opaque map, + // that map is parsed into the type + fn visit_map(self, mut map: A) -> Result + where + A: MapAccess<'de>, + { + // Here the assumption is made that only one attribute + // exists and it's the discriminator (enum "tag"). + let entry: Option<(String, String)> = map.next_entry()?; + // If there are more attributes those would need + // to be parsed as well. + let tag = match entry { + // Return an error if the no attributes are found, + // and indicate that the @tag attribute is missing. + None => Err(A::Error::missing_field($tag)), + // Check if the attribute is the tag + Some((attribute, value)) => { + if attribute == $tag { + // return the value of the tag + Ok(value) + } else { + // The attribute is not @tag, return an error + // indicating that there is an unexpected attribute + Err(A::Error::unknown_field(&attribute, &[$tag])) + } + } + }?; + + let de = serde::de::value::MapAccessDeserializer::new(map); + match tag.as_ref() { + $( + $variant_tag => Ok(deserialize_variant!( de, $enum, $($variant)+ )), + )* + _ => Err(A::Error::unknown_field(&tag, &[$($variant_tag),+])), + } + } + } + // Tell the deserializer to deserialize the data as a map, + // using the TheVisitor as the decoder + deserializer.deserialize_map(TheVisitor) + } + } + } +} + /// Provides helper functions to serialization and deserialization of types /// (usually enums) as a text content of an element and intended to use with /// [`#[serde(with = "...")]`][with], [`#[serde(deserialize_with = "...")]`][de-with]