dtolnay/serde-yaml

Ability to integrate custom `!tag` "middleware"

Closed this issue · 2 comments

I'm looking for a way to support a custom !include tag in my application:

name: My File
env: !include ./env/vars.yml

From what I can see, !include does not appear supported out-of-the-box. I therefore started looking for a way to add my own "middleware" for custom !tags. A (very) rough idea:

#[derive(Deserialize)]
struct EnvVar(String, String);

#[derive(Deserialize)]
struct MyFile {
    name: String,
    env: Vec<EnvVar>
}

struct IncludeTagDeserializer;

impl TagDeserializer for IncludeTagDeserializer {
    type Value;

    fn tag() -> &str {
        return "include";
    }

    fn deserialize(v: &TaggedValue) -> Value {
        // Load indicated file from disk, deserialize and return
    }
}

let yaml = indoc! {"
    name: My File
    env: !include ./env/vars.yml
"};

// Not sure how to work this into the current API
Deserializer::new()
    .addTagDeserializer(IncludeTagDeserializer)
    .deserialize(&yaml);

However, after some hours of digging I have not found a way to do this (or implement it myself) 😞 .

Is there a way to intercept custom !tags today or is this something that could be added to the library?

Here is an example "middleware" to get you started.

// [dependencies]
// serde = { version = "1", features = ["derive"] }
// serde_yaml = "0.9"

use serde::de::{
    DeserializeSeed, Deserializer, EnumAccess, Error, MapAccess, SeqAccess, VariantAccess, Visitor,
};
use serde::Deserialize;
use std::cell::RefCell;
use std::fmt;
use std::fs;
use std::marker::PhantomData;
use std::mem;
use std::path::{Path, PathBuf};

#[derive(Deserialize, Debug)]
pub struct EnvVar(pub String, pub String);

#[derive(Deserialize, Debug)]
pub struct MyFile {
    pub name: String,
    pub env: Vec<EnvVar>,
}

fn main() {
    fs::write("demo.yml", "name: My File\nenv: !include ./env/vars.yml\n").unwrap();
    _ = fs::create_dir("env");
    fs::write("env/vars.yml", "- [RUSTFLAGS, '']\n").unwrap();

    match deserialize_with_include::<MyFile>("demo.yml") {
        Ok(my_file) => println!("{:#?}", my_file),
        Err(error) => eprintln!("{}", error),
    }
}

pub fn deserialize_with_include<T>(path: impl AsRef<Path>) -> Result<T, serde_yaml::Error>
where
    T: for<'de> Deserialize<'de>,
{
    let path = path.as_ref();
    let contents = match fs::read_to_string(path) {
        Ok(string) => string,
        Err(io_error) => {
            return Err(Error::custom(format_args!(
                "{}: {}",
                path.display(),
                io_error
            )));
        }
    };
    let de = serde_yaml::Deserializer::from_str(&contents);
    T::deserialize(IncludeDeserializer::new(de))
}

struct IncludeDeserializer<'de, D> {
    borrow: PhantomData<&'de ()>,
    delegate: D,
}

impl<'de, D> IncludeDeserializer<'de, D> {
    fn new(delegate: D) -> Self {
        Self {
            borrow: PhantomData,
            delegate,
        }
    }
}

impl<'any, 'de, D> Deserializer<'any> for IncludeDeserializer<'de, D>
where
    D: Deserializer<'de>,
{
    type Error = D::Error;
    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'any>,
    {
        self.delegate
            .deserialize_any(IncludeDeserializer::new(visitor))
    }
    serde::forward_to_deserialize_any! {
        <V: Visitor<'any>>
        bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
        bytes byte_buf option unit unit_struct newtype_struct seq tuple
        tuple_struct map struct enum identifier ignored_any
    }
}

impl<'any, 'de, V> Visitor<'any> for IncludeDeserializer<'de, V>
where
    V: Visitor<'de>,
{
    type Value = V::Value;
    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        self.delegate.expecting(formatter)
    }
    fn visit_bool<E: Error>(self, v: bool) -> Result<Self::Value, E> {
        self.delegate.visit_bool(v)
    }
    fn visit_i8<E: Error>(self, v: i8) -> Result<Self::Value, E> {
        self.delegate.visit_i8(v)
    }
    fn visit_i16<E: Error>(self, v: i16) -> Result<Self::Value, E> {
        self.delegate.visit_i16(v)
    }
    fn visit_i32<E: Error>(self, v: i32) -> Result<Self::Value, E> {
        self.delegate.visit_i32(v)
    }
    fn visit_i64<E: Error>(self, v: i64) -> Result<Self::Value, E> {
        self.delegate.visit_i64(v)
    }
    fn visit_i128<E: Error>(self, v: i128) -> Result<Self::Value, E> {
        self.delegate.visit_i128(v)
    }
    fn visit_u8<E: Error>(self, v: u8) -> Result<Self::Value, E> {
        self.delegate.visit_u8(v)
    }
    fn visit_u16<E: Error>(self, v: u16) -> Result<Self::Value, E> {
        self.delegate.visit_u16(v)
    }
    fn visit_u32<E: Error>(self, v: u32) -> Result<Self::Value, E> {
        self.delegate.visit_u32(v)
    }
    fn visit_u64<E: Error>(self, v: u64) -> Result<Self::Value, E> {
        self.delegate.visit_u64(v)
    }
    fn visit_u128<E: Error>(self, v: u128) -> Result<Self::Value, E> {
        self.delegate.visit_u128(v)
    }
    fn visit_f32<E: Error>(self, v: f32) -> Result<Self::Value, E> {
        self.delegate.visit_f32(v)
    }
    fn visit_f64<E: Error>(self, v: f64) -> Result<Self::Value, E> {
        self.delegate.visit_f64(v)
    }
    fn visit_char<E: Error>(self, v: char) -> Result<Self::Value, E> {
        self.delegate.visit_char(v)
    }
    fn visit_str<E: Error>(self, v: &str) -> Result<Self::Value, E> {
        self.delegate.visit_str(v)
    }
    fn visit_string<E: Error>(self, v: String) -> Result<Self::Value, E> {
        self.delegate.visit_string(v)
    }
    fn visit_bytes<E: Error>(self, v: &[u8]) -> Result<Self::Value, E> {
        self.delegate.visit_bytes(v)
    }
    fn visit_byte_buf<E: Error>(self, v: Vec<u8>) -> Result<Self::Value, E> {
        self.delegate.visit_byte_buf(v)
    }
    fn visit_none<E: Error>(self) -> Result<Self::Value, E> {
        self.delegate.visit_none()
    }
    fn visit_some<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'any>,
    {
        self.delegate
            .visit_some(IncludeDeserializer::new(deserializer))
    }
    fn visit_unit<E: Error>(self) -> Result<Self::Value, E> {
        self.delegate.visit_unit()
    }
    fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'any>,
    {
        self.delegate
            .visit_newtype_struct(IncludeDeserializer::new(deserializer))
    }
    fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
    where
        A: SeqAccess<'any>,
    {
        self.delegate.visit_seq(IncludeDeserializer::new(seq))
    }
    fn visit_map<A>(self, map: A) -> Result<Self::Value, A::Error>
    where
        A: MapAccess<'any>,
    {
        self.delegate.visit_map(IncludeDeserializer::new(map))
    }
    fn visit_enum<A>(self, data: A) -> Result<Self::Value, A::Error>
    where
        A: EnumAccess<'any>,
    {
        let seed = GetIncludeVariant {
            borrow: PhantomData,
            visitor: &self.delegate,
        };
        let (value, data) = data.variant_seed(seed)?;
        match value {
            Variant::Include => data.newtype_variant_seed(IncludeVisitor {
                borrow: PhantomData,
                visitor: self.delegate,
            }),
            Variant::Other(variant) => self.delegate.visit_enum(OtherEnumAccess {
                borrow: PhantomData,
                variant,
                data,
            }),
        }
    }
}

impl<'any, 'de, A> SeqAccess<'any> for IncludeDeserializer<'de, A>
where
    A: SeqAccess<'de>,
{
    type Error = A::Error;
    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>, Self::Error>
    where
        T: DeserializeSeed<'any>,
    {
        self.delegate
            .next_element_seed(IncludeDeserializer::new(seed))
    }
}

impl<'any, 'de, A> MapAccess<'any> for IncludeDeserializer<'de, A>
where
    A: MapAccess<'de>,
{
    type Error = A::Error;
    fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>, Self::Error>
    where
        K: DeserializeSeed<'any>,
    {
        self.delegate.next_key_seed(IncludeDeserializer::new(seed))
    }
    fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value, Self::Error>
    where
        V: DeserializeSeed<'any>,
    {
        self.delegate
            .next_value_seed(IncludeDeserializer::new(seed))
    }
}

impl<'any, 'de, A> VariantAccess<'any> for IncludeDeserializer<'de, A>
where
    A: VariantAccess<'de>,
{
    type Error = A::Error;
    fn unit_variant(self) -> Result<(), Self::Error> {
        self.delegate.unit_variant()
    }
    fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error>
    where
        T: DeserializeSeed<'any>,
    {
        self.delegate
            .newtype_variant_seed(IncludeDeserializer::new(seed))
    }
    fn tuple_variant<V>(self, len: usize, visitor: V) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'any>,
    {
        self.delegate
            .tuple_variant(len, IncludeDeserializer::new(visitor))
    }
    fn struct_variant<V>(
        self,
        fields: &'static [&'static str],
        visitor: V,
    ) -> Result<V::Value, Self::Error>
    where
        V: Visitor<'any>,
    {
        self.delegate
            .struct_variant(fields, IncludeDeserializer::new(visitor))
    }
}

impl<'any, 'de, T> DeserializeSeed<'any> for IncludeDeserializer<'de, T>
where
    T: DeserializeSeed<'de>,
{
    type Value = T::Value;
    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'any>,
    {
        self.delegate
            .deserialize(IncludeDeserializer::new(deserializer))
    }
}

struct GetIncludeVariant<'a, 'de, V> {
    borrow: PhantomData<&'de ()>,
    visitor: &'a V,
}

enum Variant {
    Include,
    Other(String),
}

impl<'any, 'de, 'a, V> DeserializeSeed<'any> for GetIncludeVariant<'a, 'de, V>
where
    V: Visitor<'de>,
{
    type Value = Variant;
    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'any>,
    {
        deserializer.deserialize_identifier(self)
    }
}

impl<'any, 'de, 'a, V> Visitor<'any> for GetIncludeVariant<'a, 'de, V>
where
    V: Visitor<'de>,
{
    type Value = Variant;
    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        self.visitor.expecting(formatter)
    }
    fn visit_str<E: Error>(self, v: &str) -> Result<Self::Value, E> {
        if v == "include" {
            Ok(Variant::Include)
        } else {
            Ok(Variant::Other(v.to_owned()))
        }
    }
}

struct OtherEnumAccess<'de, D> {
    borrow: PhantomData<&'de ()>,
    variant: String,
    data: D,
}

impl<'any, 'de, D> EnumAccess<'any> for OtherEnumAccess<'de, D>
where
    D: VariantAccess<'de>,
{
    type Error = D::Error;
    type Variant = IncludeDeserializer<'de, D>;

    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error>
    where
        V: DeserializeSeed<'any>,
    {
        let de = serde::de::value::StringDeserializer::new(self.variant);
        let value = seed.deserialize(de)?;
        Ok((value, IncludeDeserializer::new(self.data)))
    }
}

struct IncludeVisitor<'de, V> {
    borrow: PhantomData<&'de ()>,
    visitor: V,
}

impl<'any, 'de, V> DeserializeSeed<'any> for IncludeVisitor<'de, V>
where
    V: Visitor<'de>,
{
    type Value = V::Value;
    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: Deserializer<'any>,
    {
        deserializer.deserialize_str(self)
    }
}

impl<'any, 'de, V> Visitor<'any> for IncludeVisitor<'de, V>
where
    V: Visitor<'de>,
{
    type Value = V::Value;
    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        formatter.write_str("filepath for the !include operation")
    }
    fn visit_str<E: Error>(self, v: &str) -> Result<Self::Value, E> {
        thread_local! {
            static INCLUDE_DIR: RefCell<PathBuf> = RefCell::new(PathBuf::new());
        }

        let (contents, prev) = INCLUDE_DIR.with(|cell| {
            let path = &mut *cell.borrow_mut();
            let mut next = path.join(v);
            let contents = match fs::read_to_string(&next) {
                Ok(string) => string,
                Err(io_error) => {
                    return Err(E::custom(format_args!(
                        "failed !include {}: {}",
                        next.display(),
                        io_error,
                    )));
                }
            };
            next.pop();
            Ok((contents, mem::replace(path, next)))
        })?;

        struct UnsetIncludeDir {
            prev: PathBuf,
        }

        impl Drop for UnsetIncludeDir {
            fn drop(&mut self) {
                INCLUDE_DIR.with(|cell| mem::swap(&mut *cell.borrow_mut(), &mut self.prev));
            }
        }

        let _guard = UnsetIncludeDir { prev };
        let de = serde_yaml::Deserializer::from_str(&contents);
        let de = IncludeDeserializer::new(de);
        match de.deserialize_any(self.visitor) {
            Ok(value) => Ok(value),
            Err(yaml_error) => Err(E::custom(format_args!("!include {}: {}", v, yaml_error))),
        }
    }
}

That's awesome - thank you @dtolnay ! serde seems really robust and extensible from what I've seen.

If I may - your example is just over 400 lines long and requires quite a bit of integration with serde internals (framework knowledge). Would it be possible for serde-yaml to expose some simplifying API for custom !tag hooks (feature request)?