diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 052d6fd4e..81ce5b62b 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -1,175 +1,186 @@ -{ - "name": "CI", - "on": { - "push": { - "branches": [ - "trunk", - "v*.x" - ] - }, - "pull_request": { - "branches": [ - "trunk", - "v*.x" - ] - } - }, - "jobs": { - "check": { - "name": "Check", - "runs-on": "ubuntu-latest", - "strategy": { - "fail-fast": false, - "matrix": { - "rust": [ - "stable", - "beta", - "nightly", - # "1.55.0" TODO: Pick latest stable version when we release 2.0 - ] - } - }, - "steps": [ - { - "uses": "actions/checkout@v2", - "name": "Checkout" - }, - { - "uses": "actions-rs/toolchain@v1", - "with": { - "profile": "minimal", - "toolchain": "${{ matrix.rust }}", - "override": true - }, - "name": "Install Rust ${{ matrix.rust }}" - }, - { - "uses": "actions-rs/cargo@v1", - "with": { - "command": "check" - }, - "name": "Run `cargo check`" - }, - { - "uses": "actions-rs/cargo@v1", - "with": { - "command": "check", - "args": "--examples" - }, - "name": "Check examples", - "if": "matrix.rust != '1.41.0'" - } - ] - }, - "test": { - "name": "Test", - "runs-on": "ubuntu-latest", - "strategy": { - "matrix": { - "rust": [ - "stable", - "beta", - "nightly" - ] - } - }, - "steps": [ - { - "uses": "actions/checkout@v2", - "name": "Checkout" - }, - { - "uses": "actions-rs/toolchain@v1", - "with": { - "profile": "minimal", - "toolchain": "${{ matrix.rust }}", - "override": true - }, - "name": "Install Rust ${{ matrix.rust }}" - }, - { - "uses": "actions-rs/cargo@v1", - "with": { - "command": "test" - }, - "name": "Run `cargo test`" - } - ] - }, - "lints": { - "name": "Lints", - "runs-on": "ubuntu-latest", - "steps": [ - { - "uses": "actions/checkout@v2", - "name": "Checkout" - }, - { - "uses": "actions-rs/toolchain@v1", - "with": { - "profile": "minimal", - "toolchain": "stable", - "override": true, - "components": "rustfmt, clippy" - }, - "name": "Install Rust stable" - }, - { - "uses": "actions-rs/cargo@v1", - "with": { - "command": "fmt", - "args": "--all -- --check" - }, - "name": "Run `cargo fmt`" - }, - { - "uses": "actions-rs/cargo@v1", - "with": { - "command": "clippy", - "args": "-- -D warnings" - }, - "name": "Run `cargo clippy`" - } - ] - }, - "coverage": { - "name": "Code Coverage", - "runs-on": "ubuntu-latest", - "steps": [ - { - "uses": "actions/checkout@v2", - "name": "Checkout" - }, - { - "uses": "actions-rs/toolchain@v1", - "with": { - "profile": "minimal", - "toolchain": "nightly", - "override": true - }, - "name": "Install Rust nightly" - }, - { - "name": "Run cargo-tarpaulin", - "uses": "actions-rs/tarpaulin@v0.1", - "with": { - "version": "0.12.3", - "args": "--ignore-tests -- --test-threads 1" - } - }, - { - "name": "Upload to codecov.io", - "uses": "codecov/codecov-action@v1" - }, - { - "name": "Archive code coverage results", - "uses": "actions/upload-artifact@v1", - "with": { - "name": "code-coverage-report", - "path": "cobertura.xml" - } - } - ] - } - } -} +{ + "name": "CI", + "on": { + "push": { + "branches": [ + "trunk", + "feature/deserde*", # Temporary while we work on deserde + "v*.x" + ] + }, + "pull_request": { + "branches": [ + "trunk", + "v*.x" + ] + } + }, + "jobs": { + "check": { + "name": "Check", + "runs-on": "ubuntu-latest", + "strategy": { + "fail-fast": false, + "matrix": { + "rust": [ + "stable", + "beta", + "nightly" + # "1.55.0" TODO: Pick latest stable version when we release 2.0 + ] + } + }, + "steps": [ + { + "uses": "actions/checkout@v2", + "name": "Checkout" + }, + { + "uses": "actions-rs/toolchain@v1", + "with": { + "profile": "minimal", + "toolchain": "${{ matrix.rust }}", + "override": true + }, + "name": "Install Rust ${{ matrix.rust }}" + }, + { + "uses": "actions-rs/cargo@v1", + "with": { + "command": "check" + }, + "name": "Run `cargo check`" + } + # , + # { + # "uses": "actions-rs/cargo@v1", + # "with": { + # "command": "check", + # "args": "--examples" + # }, + # "name": "Check examples", + # "if": "matrix.rust != '1.41.0'" + # } + ] + }, + "test": { + "name": "Test", + "runs-on": "ubuntu-latest", + "strategy": { + "matrix": { + "rust": [ + "stable", + "beta", + "nightly" + # "1.55.0" TODO: Pick latest stable version when we release 2.0 + ] + } + }, + "steps": [ + { + "uses": "actions/checkout@v2", + "name": "Checkout" + }, + { + "uses": "actions-rs/toolchain@v1", + "with": { + "profile": "minimal", + "toolchain": "${{ matrix.rust }}", + "override": true + }, + "name": "Install Rust ${{ matrix.rust }}" + }, + { + "uses": "actions-rs/install@v0.1", + "with": { + "crate": "cargo-all-features", + "version": "1.6.0" + }, + "name": "Install cargo-all-features" + }, + { + "run": "cargo test-all-features", + "name": "Run `cargo test` on all features", + "env": { + "RUSTFLAGS": "-D warnings" + } + } + ] + }, + "lints": { + "name": "Lints", + "runs-on": "ubuntu-latest", + "steps": [ + { + "uses": "actions/checkout@v2", + "name": "Checkout" + }, + { + "uses": "actions-rs/toolchain@v1", + "with": { + "profile": "minimal", + "toolchain": "stable", + "override": true, + "components": "rustfmt, clippy" + }, + "name": "Install Rust stable" + }, + { + "uses": "actions-rs/cargo@v1", + "with": { + "command": "fmt", + "args": "--all -- --check" + }, + "name": "Run `cargo fmt`" + }, + { + "uses": "actions-rs/cargo@v1", + "with": { + "command": "clippy", + "args": "--all-features -- -D warnings" + }, + "name": "Run `cargo clippy`" + } + ] + }, + "coverage": { + "name": "Code Coverage", + "runs-on": "ubuntu-latest", + "steps": [ + { + "uses": "actions/checkout@v2", + "name": "Checkout" + }, + { + "uses": "actions-rs/toolchain@v1", + "with": { + "profile": "minimal", + "toolchain": "nightly", + "override": true + }, + "name": "Install Rust nightly" + }, + { + "name": "Run cargo-tarpaulin", + "uses": "actions-rs/tarpaulin@v0.1", + "with": { + "version": "0.18.2", + "args": "--all" + } + }, + { + "name": "Upload to codecov.io", + "uses": "codecov/codecov-action@v1" + }, + { + "name": "Archive code coverage results", + "uses": "actions/upload-artifact@v1", + "with": { + "name": "code-coverage-report", + "path": "cobertura.xml" + } + } + ] + } + } +} diff --git a/.gitignore b/.gitignore index 19231ce7f..29c90d8f0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ .cargo .vscode rls*.log +tarpaulin-report.html diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 000000000..43d4840c7 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1 @@ +newline_style = "Unix" diff --git a/Cargo.toml b/Cargo.toml index a3d87420b..26e6fe1ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,35 +1,54 @@ -[package] -name = "bincode" -version = "2.0.0-dev" # remember to update html_root_url -authors = ["Ty Overby ", "Francesco Mazzoli ", "David Tolnay ", "Zoey Riordan "] -exclude = ["logo.svg", "examples/*", ".gitignore", ".travis.yml"] - -publish = true - -repository = "https://github.com/bincode-org/bincode" -documentation = "https://docs.rs/bincode" -readme = "./readme.md" -categories = ["encoding", "network-programming"] -keywords = ["binary", "encode", "decode", "serialize", "deserialize"] - -license = "MIT" -description = "A binary serialization / deserialization strategy that uses Serde for transforming structs into bytes and vice versa!" - -edition = "2018" -[dependencies] -byteorder = "1.3.0" -serde = "1.0.63" - -[dev-dependencies] -serde_bytes = "0.11" -serde_derive = "1.0.27" -criterion = "0.3" -rand = "0.8" - -[[bench]] -name = "varint" -harness = false - -[profile.bench] -codegen-units = 1 -debug = 1 +[workspace] +members = [ + "derive" +] + +[package] +name = "bincode" +version = "2.0.0-alpha.0" # remember to update html_root_url and bincode_derive +authors = ["Ty Overby ", "Francesco Mazzoli ", "Zoey Riordan ", "Victor Koenders "] +exclude = ["logo.svg", "examples/*", ".gitignore", ".github/"] + +publish = true + +repository = "https://github.com/bincode-org/bincode" +documentation = "https://docs.rs/bincode" +readme = "./readme.md" +categories = ["encoding", "network-programming"] +keywords = ["binary", "encode", "decode", "serialize", "deserialize"] + +license = "MIT" +description = "A binary serialization / deserialization strategy for transforming structs into bytes and vice versa!" + +edition = "2021" + +[features] +default = ["std", "derive", "atomic"] +std = ["alloc"] +alloc = [] +atomic = [] +derive = ["bincode_derive"] + +[dependencies] +bincode_derive = { path = "derive", version = "2.0.0-alpha.0", optional = true } +serde = { version = "1.0.130", optional = true } + +# Used for tests +[dev-dependencies] +serde_derive = "1.0.130" +serde_json = "1.0.68" +tempfile = "3.2.0" +criterion = "0.3" +rand = "0.8" + +[[bench]] +name = "varint" +harness = false + +[profile.bench] +codegen-units = 1 +debug = 1 + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] diff --git a/benches/varint.rs b/benches/varint.rs index 690f7e9e0..6f18bfc0e 100644 --- a/benches/varint.rs +++ b/benches/varint.rs @@ -1,6 +1,6 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use bincode::Options; +use bincode::config::Configuration; use rand::distributions::Distribution; fn slice_varint_u8(c: &mut Criterion) { @@ -9,12 +9,12 @@ fn slice_varint_u8(c: &mut Criterion) { let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) .take(10_000) .collect(); - let options = bincode::options(); - let bytes = options.serialize(&input).unwrap(); + let config = Configuration::standard(); + let bytes = bincode::encode_to_vec(&input, config).unwrap(); c.bench_function("slice_varint_u8", |b| { b.iter(|| { - let _: Vec = options.deserialize(&bytes).unwrap(); + let _: Vec = bincode::decode_from_slice(&bytes, config).unwrap(); }) }); } @@ -25,12 +25,12 @@ fn slice_varint_u16(c: &mut Criterion) { let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) .take(10_000) .collect(); - let options = bincode::options(); - let bytes = options.serialize(&input).unwrap(); + let config = Configuration::standard(); + let bytes = bincode::encode_to_vec(&input, config).unwrap(); c.bench_function("slice_varint_u16", |b| { b.iter(|| { - let _: Vec = options.deserialize(&bytes).unwrap(); + let _: Vec = bincode::decode_from_slice(&bytes, config).unwrap(); }) }); } @@ -41,12 +41,12 @@ fn slice_varint_u32(c: &mut Criterion) { let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) .take(10_000) .collect(); - let options = bincode::options(); - let bytes = options.serialize(&input).unwrap(); + let config = Configuration::standard(); + let bytes = bincode::encode_to_vec(&input, config).unwrap(); c.bench_function("slice_varint_u32", |b| { b.iter(|| { - let _: Vec = options.deserialize(&bytes).unwrap(); + let _: Vec = bincode::decode_from_slice(&bytes, config).unwrap(); }) }); } @@ -57,12 +57,12 @@ fn slice_varint_u64(c: &mut Criterion) { let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) .take(10_000) .collect(); - let options = bincode::options(); - let bytes = options.serialize(&input).unwrap(); + let config = Configuration::standard(); + let bytes = bincode::encode_to_vec(&input, config).unwrap(); c.bench_function("slice_varint_u64", |b| { b.iter(|| { - let _: Vec = options.deserialize(&bytes).unwrap(); + let _: Vec = bincode::decode_from_slice(&bytes, config).unwrap(); }) }); } @@ -73,14 +73,14 @@ fn bufreader_varint_u8(c: &mut Criterion) { let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) .take(10_000) .collect(); - let options = bincode::options(); - let bytes = options.serialize(&input).unwrap(); + let config = Configuration::standard(); + let bytes = bincode::encode_to_vec(&input, config).unwrap(); c.bench_function("bufreader_varint_u8", |b| { b.iter(|| { - let _: Vec = options - .deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..])) - .unwrap(); + let _: Vec = + bincode::decode_from_reader(&mut std::io::BufReader::new(&bytes[..]), config) + .unwrap(); }) }); } @@ -91,14 +91,14 @@ fn bufreader_varint_u16(c: &mut Criterion) { let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) .take(10_000) .collect(); - let options = bincode::options(); - let bytes = options.serialize(&input).unwrap(); + let config = Configuration::standard(); + let bytes = bincode::encode_to_vec(&input, config).unwrap(); c.bench_function("bufreader_varint_u16", |b| { b.iter(|| { - let _: Vec = options - .deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..])) - .unwrap(); + let _: Vec = + bincode::decode_from_reader(&mut std::io::BufReader::new(&bytes[..]), config) + .unwrap(); }) }); } @@ -109,14 +109,14 @@ fn bufreader_varint_u32(c: &mut Criterion) { let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) .take(10_000) .collect(); - let options = bincode::options(); - let bytes = options.serialize(&input).unwrap(); + let config = Configuration::standard(); + let bytes = bincode::encode_to_vec(&input, config).unwrap(); c.bench_function("bufreader_varint_u32", |b| { b.iter(|| { - let _: Vec = options - .deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..])) - .unwrap(); + let _: Vec = + bincode::decode_from_reader(&mut std::io::BufReader::new(&bytes[..]), config) + .unwrap(); }) }); } @@ -127,14 +127,14 @@ fn bufreader_varint_u64(c: &mut Criterion) { let input: Vec = std::iter::from_fn(|| Some(dist.sample(&mut rng))) .take(10_000) .collect(); - let options = bincode::options(); - let bytes = options.serialize(&input).unwrap(); + let config = Configuration::standard(); + let bytes = bincode::encode_to_vec(&input, config).unwrap(); c.bench_function("bufreader_varint_u64", |b| { b.iter(|| { - let _: Vec = options - .deserialize_from_custom(&mut std::io::BufReader::new(&bytes[..])) - .unwrap(); + let _: Vec = + bincode::decode_from_reader(&mut std::io::BufReader::new(&bytes[..]), config) + .unwrap(); }) }); } diff --git a/derive/.gitignore b/derive/.gitignore new file mode 100644 index 000000000..4fffb2f89 --- /dev/null +++ b/derive/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/derive/Cargo.toml b/derive/Cargo.toml new file mode 100644 index 000000000..c3a6ec3fa --- /dev/null +++ b/derive/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "bincode_derive" +version = "2.0.0-alpha.0" # remember to update bincode +authors = ["Zoey Riordan ", "Victor Koenders "] +edition = "2021" + +repository = "https://github.com/bincode-org/bincode" +documentation = "https://docs.rs/bincode_derive" +readme = "./readme.md" +categories = ["encoding", "network-programming"] +keywords = ["binary", "encode", "decode", "serialize", "deserialize"] + +[lib] +proc-macro = true + +[dev-dependencies] +proc-macro2 = "1.0" diff --git a/derive/readme.md b/derive/readme.md new file mode 100644 index 000000000..22f216a10 --- /dev/null +++ b/derive/readme.md @@ -0,0 +1,28 @@ +# Bincode-derive + +The derive crate for bincode. Implements `bincode::Encode` and `bincode::Decode`. + +This crate is roughly split into 2 parts: + +# Parsing + +Most of parsing is done in the `src/parse/` folder. This will generate the following types: +- `Attributes`, not being used currently +- `Visibility`, not being used currently +- `DataType` either `Struct` or `Enum`, with the name of the data type being parsed +- `Generics` the generics part of the type, e.g. `struct Foo<'a>` +- `GenericConstraints` the "where" part of the type + +# Generate + +Generating the code implementation is done in either `src/derive_enum.rs` and `src/derive_struct.rs`. + +This is supported by the structs in `src/generate`. The most notable points of this module are: +- `StreamBuilder` is a thin but friendly wrapper around `TokenStream` +- `Generator` is the base type of the code generator. This has helper methods to generate implementations: + - `ImplFor` is a helper struct for a single `impl A for B` construction. In this functions can be defined: + - `GenerateFnBody` is a helper struct for a single function in the above `impl`. This is created with a callback to `FnBuilder` which helps set some properties. `GenerateFnBody` has a `stream()` function which returns ` StreamBuilder` for the function. + +For additional derive testing, see the test cases in `../tests` + +For testing purposes, all generated code is outputted to the current `target` folder, under file name `_Encode.rs` and `_Decode.rs`. This can help with debugging. diff --git a/derive/src/derive_enum.rs b/derive/src/derive_enum.rs new file mode 100644 index 000000000..6ab9f3026 --- /dev/null +++ b/derive/src/derive_enum.rs @@ -0,0 +1,197 @@ +use crate::generate::{FnSelfArg, Generator}; +use crate::parse::{EnumVariant, Fields}; +use crate::prelude::*; +use crate::Result; + +const TUPLE_FIELD_PREFIX: &str = "field_"; + +pub struct DeriveEnum { + pub variants: Vec, +} + +impl DeriveEnum { + pub fn generate_encode(self, generator: &mut Generator) -> Result<()> { + let DeriveEnum { variants } = self; + + generator + .impl_for("bincode::enc::Encode") + .unwrap() + .generate_fn("encode") + .with_generic("E", ["bincode::enc::Encoder"]) + .with_self_arg(FnSelfArg::RefSelf) + .with_arg("mut encoder", "E") + .with_return_type("core::result::Result<(), bincode::error::EncodeError>") + .body(|fn_body| { + fn_body.ident_str("match"); + fn_body.ident_str("self"); + fn_body.group(Delimiter::Brace, |match_body| { + for (variant_index, variant) in variants.into_iter().enumerate() { + // Self::Variant + match_body.ident_str("Self"); + match_body.puncts("::"); + match_body.ident(variant.name.clone()); + + // if we have any fields, declare them here + // Self::Variant { a, b, c } + if let Some(delimiter) = variant.fields.delimiter() { + match_body.group(delimiter, |field_body| { + for (idx, field_name) in + variant.fields.names().into_iter().enumerate() + { + if idx != 0 { + field_body.punct(','); + } + field_body.push( + field_name.to_token_tree_with_prefix(TUPLE_FIELD_PREFIX), + ); + } + }); + } + + // Arrow + // Self::Variant { a, b, c } => + match_body.puncts("=>"); + + // Body of this variant + // Note that the fields are available as locals because of the match destructuring above + // { + // encoder.encode_u32(n)?; + // bincode::enc::Encode::encode(a, &mut encoder)?; + // bincode::enc::Encode::encode(b, &mut encoder)?; + // bincode::enc::Encode::encode(c, &mut encoder)?; + // } + match_body.group(Delimiter::Brace, |body| { + // variant index + body.push_parsed(format!( + "::encode(&{}, &mut encoder)?;", + variant_index + )) + .unwrap(); + // If we have any fields, encode them all one by one + for field_name in variant.fields.names() { + body.push_parsed(format!( + "bincode::enc::Encode::encode({}, &mut encoder)?;", + field_name.to_string_with_prefix(TUPLE_FIELD_PREFIX), + )) + .unwrap(); + } + }); + match_body.punct(','); + } + }); + fn_body.push_parsed("Ok(())").unwrap(); + }) + .unwrap(); + Ok(()) + } + + pub fn generate_decode(self, generator: &mut Generator) -> Result<()> { + let DeriveEnum { variants } = self; + let enum_name = generator.target_name().to_string(); + + if generator.has_lifetimes() { + // enum has a lifetime, implement BorrowDecode + + generator.impl_for_with_de_lifetime("bincode::de::BorrowDecode<'__de>") + .unwrap() + .generate_fn("borrow_decode") + .with_generic("D", ["bincode::de::BorrowDecoder<'__de>"]) + .with_arg("mut decoder", "D") + .with_return_type("Result") + .body(|fn_builder| { + fn_builder + .push_parsed("let variant_index = ::decode(&mut decoder)?;").unwrap(); + fn_builder.push_parsed("match variant_index").unwrap(); + fn_builder.group(Delimiter::Brace, |variant_case| { + for (idx, variant) in variants.iter().enumerate() { + // idx => Ok(..) + variant_case.lit_u32(idx as u32); + variant_case.puncts("=>"); + variant_case.ident_str("Ok"); + variant_case.group(Delimiter::Parenthesis, |variant_case_body| { + // Self::Variant { } + // Self::Variant { 0: ..., 1: ... 2: ... }, + // Self::Variant { a: ..., b: ... c: ... }, + variant_case_body.ident_str("Self"); + variant_case_body.puncts("::"); + variant_case_body.ident(variant.name.clone()); + + variant_case_body.group(Delimiter::Brace, |variant_body| { + let is_tuple = matches!(variant.fields, Fields::Tuple(_)); + for (idx, field) in variant.fields.names().into_iter().enumerate() { + if is_tuple { + variant_body.lit_usize(idx); + } else { + variant_body.ident(field.unwrap_ident().clone()); + } + variant_body.punct(':'); + variant_body.push_parsed("bincode::de::BorrowDecode::borrow_decode(&mut decoder)?,").unwrap(); + } + }); + }); + variant_case.punct(','); + } + + // invalid idx + variant_case.push_parsed(format!( + "variant => return Err(bincode::error::DecodeError::UnexpectedVariant {{ min: 0, max: {}, found: variant, type_name: {:?} }})", + variants.len() - 1, + enum_name.to_string() + )).unwrap(); + }); + }).unwrap(); + } else { + // enum has no lifetimes, implement Decode + generator.impl_for("bincode::de::Decode") + .unwrap() + .generate_fn("decode") + .with_generic("D", ["bincode::de::Decoder"]) + .with_arg("mut decoder", "D") + .with_return_type("Result") + .body(|fn_builder| { + fn_builder + .push_parsed("let variant_index = ::decode(&mut decoder)?;").unwrap(); + fn_builder.push_parsed("match variant_index").unwrap(); + fn_builder.group(Delimiter::Brace, |variant_case| { + for (idx, variant) in variants.iter().enumerate() { + // idx => Ok(..) + variant_case.lit_u32(idx as u32); + variant_case.puncts("=>"); + variant_case.ident_str("Ok"); + variant_case.group(Delimiter::Parenthesis, |variant_case_body| { + // Self::Variant { } + // Self::Variant { 0: ..., 1: ... 2: ... }, + // Self::Variant { a: ..., b: ... c: ... }, + variant_case_body.ident_str("Self"); + variant_case_body.puncts("::"); + variant_case_body.ident(variant.name.clone()); + + variant_case_body.group(Delimiter::Brace, |variant_body| { + let is_tuple = matches!(variant.fields, Fields::Tuple(_)); + for (idx, field) in variant.fields.names().into_iter().enumerate() { + if is_tuple { + variant_body.lit_usize(idx); + } else { + variant_body.ident(field.unwrap_ident().clone()); + } + variant_body.punct(':'); + variant_body.push_parsed("bincode::de::Decode::decode(&mut decoder)?,").unwrap(); + } + }); + }); + variant_case.punct(','); + } + + // invalid idx + variant_case.push_parsed(format!( + "variant => return Err(bincode::error::DecodeError::UnexpectedVariant {{ min: 0, max: {}, found: variant, type_name: {:?} }})", + variants.len() - 1, + enum_name.to_string() + )).unwrap(); + }); + }).unwrap(); + } + + Ok(()) + } +} diff --git a/derive/src/derive_struct.rs b/derive/src/derive_struct.rs new file mode 100644 index 000000000..8c08f5605 --- /dev/null +++ b/derive/src/derive_struct.rs @@ -0,0 +1,109 @@ +use crate::generate::Generator; +use crate::parse::Fields; +use crate::prelude::Delimiter; +use crate::Result; + +pub struct DeriveStruct { + pub fields: Fields, +} + +impl DeriveStruct { + pub fn generate_encode(self, generator: &mut Generator) -> Result<()> { + let DeriveStruct { fields } = self; + + generator + .impl_for("bincode::enc::Encode") + .unwrap() + .generate_fn("encode") + .with_generic("E", ["bincode::enc::Encoder"]) + .with_self_arg(crate::generate::FnSelfArg::RefSelf) + .with_arg("mut encoder", "E") + .with_return_type("Result<(), bincode::error::EncodeError>") + .body(|fn_body| { + for field in fields.names() { + fn_body + .push_parsed(format!( + "bincode::enc::Encode::encode(&self.{}, &mut encoder)?;", + field.to_string() + )) + .unwrap(); + } + fn_body.push_parsed("Ok(())").unwrap(); + }) + .unwrap(); + + Ok(()) + } + + pub fn generate_decode(self, generator: &mut Generator) -> Result<()> { + let DeriveStruct { fields } = self; + + if generator.has_lifetimes() { + // struct has a lifetime, implement BorrowDecode + + generator + .impl_for_with_de_lifetime("bincode::de::BorrowDecode<'__de>") + .unwrap() + .generate_fn("borrow_decode") + .with_generic("D", ["bincode::de::BorrowDecoder<'__de>"]) + .with_arg("mut decoder", "D") + .with_return_type("Result") + .body(|fn_body| { + // Ok(Self { + fn_body.ident_str("Ok"); + fn_body.group(Delimiter::Parenthesis, |ok_group| { + ok_group.ident_str("Self"); + ok_group.group(Delimiter::Brace, |struct_body| { + for field in fields.names() { + struct_body + .push_parsed(format!( + "{}: bincode::de::BorrowDecode::borrow_decode(&mut decoder)?,", + field.to_string() + )) + .unwrap(); + } + }); + }); + }) + .unwrap(); + + Ok(()) + } else { + // struct has no lifetimes, implement Decode + + generator + .impl_for("bincode::de::Decode") + .unwrap() + .generate_fn("decode") + .with_generic("D", ["bincode::de::Decoder"]) + .with_arg("mut decoder", "D") + .with_return_type("Result") + .body(|fn_body| { + // Ok(Self { + fn_body.ident_str("Ok"); + fn_body.group(Delimiter::Parenthesis, |ok_group| { + ok_group.ident_str("Self"); + ok_group.group(Delimiter::Brace, |struct_body| { + // Fields + // { + // a: bincode::de::Decode::decode(&mut decoder)?, + // b: bincode::de::Decode::decode(&mut decoder)?, + // ... + // } + for field in fields.names() { + struct_body + .push_parsed(format!( + "{}: bincode::de::Decode::decode(&mut decoder)?,", + field.to_string() + )) + .unwrap(); + } + }); + }); + }) + .unwrap(); + + Ok(()) + } + } +} diff --git a/derive/src/error.rs b/derive/src/error.rs new file mode 100644 index 000000000..b44dcdac5 --- /dev/null +++ b/derive/src/error.rs @@ -0,0 +1,56 @@ +use crate::{generate::StreamBuilder, prelude::*}; +use std::fmt; + +#[derive(Debug)] +pub enum Error { + UnknownDataType(Span), + InvalidRustSyntax(Span), + ExpectedIdent(Span), +} + +// helper functions for the unit tests +#[cfg(test)] +impl Error { + pub fn is_unknown_data_type(&self) -> bool { + matches!(self, Error::UnknownDataType(_)) + } + + pub fn is_invalid_rust_syntax(&self) -> bool { + matches!(self, Error::InvalidRustSyntax(_)) + } +} + +impl fmt::Display for Error { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::UnknownDataType(_) => { + write!(fmt, "Unknown data type, only enum and struct are supported") + } + Self::InvalidRustSyntax(_) => write!(fmt, "Invalid rust syntax"), + Self::ExpectedIdent(_) => write!(fmt, "Expected ident"), + } + } +} + +impl Error { + pub fn into_token_stream(self) -> TokenStream { + let maybe_span = match &self { + Error::UnknownDataType(span) + | Error::ExpectedIdent(span) + | Error::InvalidRustSyntax(span) => Some(*span), + }; + self.throw_with_span(maybe_span.unwrap_or_else(Span::call_site)) + } + + pub fn throw_with_span(self, span: Span) -> TokenStream { + // compile_error!($message) + let mut builder = StreamBuilder::new(); + builder.ident_str("compile_error"); + builder.punct('!'); + builder.group(Delimiter::Brace, |b| { + b.lit_str(self.to_string()); + }); + builder.set_span_on_all_tokens(span); + builder.stream + } +} diff --git a/derive/src/generate/generate_fn.rs b/derive/src/generate/generate_fn.rs new file mode 100644 index 000000000..f63a39057 --- /dev/null +++ b/derive/src/generate/generate_fn.rs @@ -0,0 +1,199 @@ +use super::{stream_builder::PushParseError, ImplFor, StreamBuilder}; +use crate::prelude::Delimiter; + +/// A builder for functions. +pub struct FnBuilder<'a, 'b> { + generate: &'b mut ImplFor<'a>, + name: String, + + lifetime_and_generics: Vec<(String, Vec)>, + self_arg: FnSelfArg, + args: Vec<(String, String)>, + return_type: Option, +} + +impl<'a, 'b> FnBuilder<'a, 'b> { + pub(super) fn new(generate: &'b mut ImplFor<'a>, name: impl Into) -> Self { + Self { + generate, + name: name.into(), + lifetime_and_generics: Vec::new(), + self_arg: FnSelfArg::None, + args: Vec::new(), + return_type: None, + } + } + + /// Add a generic parameter. Keep in mind that will *not* work for lifetimes. + /// + /// `dependencies` are the optional dependencies of the parameter. + /// + /// ```ignore + /// let mut builder: FnBuilder = ...; + /// builder + /// .with_generic("D", None) // fn Foo() + /// .with_generic("E", &["Encodable"]); // fn foo(); + /// ``` + pub fn with_generic(mut self, name: T, dependencies: U) -> Self + where + T: Into, + U: IntoIterator, + V: Into, + { + self.lifetime_and_generics.push(( + name.into(), + dependencies.into_iter().map(|d| d.into()).collect(), + )); + self + } + + /// Set the value for `self`. See [FnSelfArg] for more information. + /// + /// ```ignore + /// let mut builder: FnBuilder = ...; + /// // static function by default + /// builder.with_self_arg(FnSelfArg::RefSelf); // fn foo(&self) + /// ``` + pub fn with_self_arg(mut self, self_arg: FnSelfArg) -> Self { + self.self_arg = self_arg; + self + } + + /// Add an argument with a `name` and a `ty`. + /// + /// ```ignore + /// let mut builder: FnBuilder = ...; + /// // fn foo(); + /// builder + /// .with_arg("a", "u32") // fn foo(a: u32) + /// .with_arg("b", "u32"); // fn foo(a: u32, b: u32) + /// ``` + pub fn with_arg(mut self, name: impl Into, ty: impl Into) -> Self { + self.args.push((name.into(), ty.into())); + self + } + + /// Set the return type for the function. By default the function will have no return type. + /// + /// ```ignore + /// let mut builder: FnBuilder = ...; + /// // fn foo() + /// builder.with_return_type("u32"); // fn foo() -> u32 + /// ``` + pub fn with_return_type(mut self, ret_type: impl Into) -> Self { + self.return_type = Some(ret_type.into()); + self + } + + /// Complete the function definition. This function takes a callback that will form the body of the function. + /// + /// ```ignore + /// let mut builder: FnBuilder = ...; + /// // fn foo() + /// builder.body(|b| { + /// b.push_parsed("println!(\"hello world\");"); + /// }); + /// ``` + pub fn body(self, body_builder: impl FnOnce(&mut StreamBuilder)) -> Result<(), PushParseError> { + let FnBuilder { + generate, + name, + lifetime_and_generics, + self_arg, + args, + return_type, + } = self; + + let mut builder = StreamBuilder::new(); + + // function name; `fn name` + builder.ident_str("fn"); + builder.ident_str(name); + + // lifetimes; `<'a: 'b, D: Display>` + if !lifetime_and_generics.is_empty() { + builder.punct('<'); + for (idx, (lifetime_and_generic, dependencies)) in + lifetime_and_generics.into_iter().enumerate() + { + if idx != 0 { + builder.punct(','); + } + builder.ident_str(&lifetime_and_generic); + if !dependencies.is_empty() { + for (idx, dependency) in dependencies.into_iter().enumerate() { + builder.punct(if idx == 0 { ':' } else { '+' }); + builder.push_parsed(&dependency)?; + } + } + } + builder.punct('>'); + } + + // Arguments; `(&self, foo: &Bar)` + builder.group(Delimiter::Parenthesis, |arg_stream| { + if let Some(self_arg) = self_arg.into_token_tree() { + arg_stream.append(self_arg); + arg_stream.punct(','); + } + for (idx, (arg_name, arg_ty)) in args.into_iter().enumerate() { + if idx != 0 { + arg_stream.punct(','); + } + arg_stream.push_parsed(&arg_name)?; + arg_stream.punct(':'); + arg_stream.push_parsed(&arg_ty)?; + } + Ok(()) + })?; + + // Return type: `-> ResultType` + if let Some(return_type) = return_type { + builder.puncts("->"); + builder.push_parsed(&return_type)?; + } + + generate.group.append(builder); + generate.group.group(Delimiter::Brace, body_builder); + + Ok(()) + } +} + +/// The `self` argument of a function +#[allow(dead_code)] +pub enum FnSelfArg { + /// No `self` argument. The function will be a static function. + None, + + /// `self`. The function will consume self. + TakeSelf, + + /// `&self`. The function will take self by reference. + RefSelf, + + /// `&mut self`. The function will take self by mutable reference. + MutSelf, +} + +impl FnSelfArg { + fn into_token_tree(self) -> Option { + let mut builder = StreamBuilder::new(); + match self { + Self::None => return None, + Self::TakeSelf => { + builder.ident_str("self"); + } + Self::RefSelf => { + builder.punct('&'); + builder.ident_str("self"); + } + Self::MutSelf => { + builder.punct('&'); + builder.ident_str("mut"); + builder.ident_str("self"); + } + } + Some(builder) + } +} diff --git a/derive/src/generate/generator.rs b/derive/src/generate/generator.rs new file mode 100644 index 000000000..7cc0bc0b4 --- /dev/null +++ b/derive/src/generate/generator.rs @@ -0,0 +1,66 @@ +use super::stream_builder::PushParseError; +use super::{ImplFor, StreamBuilder}; +use crate::parse::{GenericConstraints, Generics}; +use crate::prelude::{Ident, TokenStream}; + +#[must_use] +pub struct Generator { + pub(super) name: Ident, + pub(super) generics: Option, + pub(super) generic_constraints: Option, + pub(super) stream: StreamBuilder, +} + +impl Generator { + pub(crate) fn new( + name: Ident, + generics: Option, + generic_constraints: Option, + ) -> Self { + Self { + name, + generics, + generic_constraints, + stream: StreamBuilder::new(), + } + } + + /// Return the name for the struct or enum that this is going to be implemented on. + pub fn target_name(&self) -> &Ident { + &self.name + } + + /// Generate an `for for ` implementation. See [ImplFor] for more information. + pub fn impl_for<'a>(&'a mut self, trait_name: &str) -> Result, PushParseError> { + ImplFor::new(self, trait_name) + } + + /// Generate an `for <'__de> for ` implementation. See [ImplFor] for more information. + pub fn impl_for_with_de_lifetime<'a>( + &'a mut self, + trait_name: &str, + ) -> Result, PushParseError> { + ImplFor::new_with_de_lifetime(self, trait_name) + } + + /// Returns `true` if the struct or enum has lifetimes. + pub fn has_lifetimes(&self) -> bool { + self.generics + .as_ref() + .map(|g| g.has_lifetime()) + .unwrap_or(false) + } + + /// Consume the contents of this generator. This *must* be called, or else the generator will panic on drop. + pub fn take_stream(mut self) -> TokenStream { + std::mem::take(&mut self.stream).stream + } +} + +impl Drop for Generator { + fn drop(&mut self) { + if !self.stream.stream.is_empty() && !std::thread::panicking() { + panic!("Generator dropped but the stream is not empty. Please call `.take_stream()` on the generator"); + } + } +} diff --git a/derive/src/generate/impl_for.rs b/derive/src/generate/impl_for.rs new file mode 100644 index 000000000..db2ddf42d --- /dev/null +++ b/derive/src/generate/impl_for.rs @@ -0,0 +1,80 @@ +use super::{stream_builder::PushParseError, FnBuilder, Generator, StreamBuilder}; +use crate::prelude::Delimiter; + +#[must_use] +pub struct ImplFor<'a> { + pub(super) generator: &'a mut Generator, + pub(super) group: StreamBuilder, +} + +impl<'a> ImplFor<'a> { + pub(super) fn new( + generator: &'a mut Generator, + trait_name: &str, + ) -> Result { + let mut builder = StreamBuilder::new(); + builder.ident_str("impl"); + + if let Some(generics) = &generator.generics { + builder.append(generics.impl_generics()); + } + builder.push_parsed(trait_name)?; + builder.ident_str("for"); + builder.ident(generator.name.clone()); + + if let Some(generics) = &generator.generics { + builder.append(generics.type_generics()); + } + if let Some(generic_constraints) = &generator.generic_constraints { + builder.append(generic_constraints.where_clause()); + } + generator.stream.append(builder); + + let group = StreamBuilder::new(); + Ok(Self { generator, group }) + } + + pub(super) fn new_with_de_lifetime( + generator: &'a mut Generator, + trait_name: &str, + ) -> Result { + let mut builder = StreamBuilder::new(); + builder.ident_str("impl"); + + if let Some(generics) = &generator.generics { + builder.append(generics.impl_generics_with_additional_lifetime("__de")); + } else { + builder.punct('<'); + builder.lifetime_str("__de"); + builder.punct('>'); + } + + builder.push_parsed(trait_name)?; + builder.ident_str("for"); + builder.ident(generator.name.clone()); + if let Some(generics) = &generator.generics { + builder.append(generics.type_generics()); + } + if let Some(generic_constraints) = &generator.generic_constraints { + builder.append(generic_constraints.where_clause()); + } + generator.stream.append(builder); + + let group = StreamBuilder::new(); + Ok(Self { generator, group }) + } + + /// Add a function to the trait implementation + pub fn generate_fn<'b>(&'b mut self, name: &str) -> FnBuilder<'a, 'b> { + FnBuilder::new(self, name) + } +} + +impl Drop for ImplFor<'_> { + fn drop(&mut self) { + let stream = std::mem::take(&mut self.group); + self.generator + .stream + .group(Delimiter::Brace, |builder| builder.append(stream)) + } +} diff --git a/derive/src/generate/mod.rs b/derive/src/generate/mod.rs new file mode 100644 index 000000000..696464dab --- /dev/null +++ b/derive/src/generate/mod.rs @@ -0,0 +1,9 @@ +mod generate_fn; +mod generator; +mod impl_for; +mod stream_builder; + +pub use self::generate_fn::{FnBuilder, FnSelfArg}; +pub use self::generator::Generator; +pub use self::impl_for::ImplFor; +pub use self::stream_builder::StreamBuilder; diff --git a/derive/src/generate/stream_builder.rs b/derive/src/generate/stream_builder.rs new file mode 100644 index 000000000..3c4464786 --- /dev/null +++ b/derive/src/generate/stream_builder.rs @@ -0,0 +1,156 @@ +use crate::prelude::{ + Delimiter, Group, Ident, LexError, Literal, Punct, Spacing, Span, TokenStream, TokenTree, +}; +use std::str::FromStr; + +/// A helper struct build around a [TokenStream] to make it easier to build code. +#[must_use] +#[derive(Default)] +pub struct StreamBuilder { + pub(crate) stream: TokenStream, +} + +impl StreamBuilder { + /// Generate a new StreamBuilder + pub fn new() -> Self { + Self { + stream: TokenStream::new(), + } + } + + /// Add multiple `TokenTree` items to the stream. + pub fn extend(&mut self, item: impl IntoIterator) { + self.stream.extend(item); + } + + /// Append another StreamBuilder to the current StreamBuilder. + pub fn append(&mut self, builder: StreamBuilder) { + self.stream.extend(builder.stream); + } + + /// Push a single token to the stream. + pub fn push(&mut self, item: impl Into) { + self.stream.extend([item.into()]); + } + + /// Attempt to parse the given string as valid Rust code, and append the parsed result to the internal stream. + /// + /// Currently panics if the string could not be parsed as valid Rust code. + pub fn push_parsed(&mut self, item: impl AsRef) -> Result<(), PushParseError> { + let tokens = TokenStream::from_str(item.as_ref()).map_err(|e| PushParseError { + error: e, + code: item.as_ref().to_string(), + })?; + self.stream.extend(tokens); + Ok(()) + } + + /// Push a single ident to the stream. An ident is any worse that a code file may contain, e.g. `fn`, `struct`, `where`, names of functions and structs, etc. + pub fn ident(&mut self, ident: Ident) { + self.stream.extend([TokenTree::Ident(ident)]); + } + + /// Push a single ident to the stream. An ident is any worse that a code file may contain, e.g. `fn`, `struct`, `where`, names of functions and structs, etc. + pub fn ident_str(&mut self, ident: impl AsRef) { + self.stream.extend([TokenTree::Ident(Ident::new( + ident.as_ref(), + Span::call_site(), + ))]); + } + + /// Add a group. A group is any block surrounded by `{ .. }`, `[ .. ]` or `( .. )`. + /// + /// `delim` indicates which group it is. The `inner` callback is used to fill the contents of the group. + pub fn group(&mut self, delim: Delimiter, inner: impl FnOnce(&mut StreamBuilder) -> T) -> T { + let mut stream = StreamBuilder::new(); + let result = inner(&mut stream); + self.stream + .extend([TokenTree::Group(Group::new(delim, stream.stream))]); + result + } + + /// Add a single punctuation to the stream. Puncts are single-character tokens like `.`, `<`, `#`, etc + /// + /// Note that this should not be used for multi-punct constructions like `::` or `->`. For that use [puncts] instead. + pub fn punct(&mut self, p: char) { + self.stream + .extend([TokenTree::Punct(Punct::new(p, Spacing::Alone))]); + } + + /// Add multiple punctuations to the stream. Multi punct tokens are e.g. `::`, `->` and `=>`. + /// + /// Note that this is the only way to add multi punct tokens. + /// If you were to use [punct] to insert `->` it would be inserted as `-` and then `>`, and not form a single token. Rust would interpret this as a "minus sign and then a greater than sign", not as a single arrow. + pub fn puncts(&mut self, puncts: &str) { + self.stream.extend( + puncts + .chars() + .map(|char| TokenTree::Punct(Punct::new(char, Spacing::Joint))), + ); + } + + /// Add a lifetime to the stream. + /// + /// Note that this is the only way to add lifetimes, if you were to do: + /// ```ignore + /// builder.punct('\''); + /// builder.ident_str("static"); + /// ``` + /// It would not add `'static`, but instead it would add `' static` as seperate tokens, and the lifetime would not work. + pub fn lifetime(&mut self, lt: Ident) { + self.stream.extend([ + TokenTree::Punct(Punct::new('\'', Spacing::Joint)), + TokenTree::Ident(lt), + ]); + } + + /// Add a lifetime to the stream. + /// + /// Note that this is the only way to add lifetimes, if you were to do: + /// ```ignore + /// builder.punct('\''); + /// builder.ident_str("static"); + /// ``` + /// It would not add `'static`, but instead it would add `' static` as seperate tokens, and the lifetime would not work. + pub fn lifetime_str(&mut self, lt: &str) { + self.stream.extend([ + TokenTree::Punct(Punct::new('\'', Spacing::Joint)), + TokenTree::Ident(Ident::new(lt, Span::call_site())), + ]); + } + + /// Add a literal string (`&'static str`) to the stream. + pub fn lit_str(&mut self, str: impl AsRef) { + self.stream + .extend([TokenTree::Literal(Literal::string(str.as_ref()))]); + } + + /// Add an `u32` value to the stream. + pub fn lit_u32(&mut self, val: u32) { + self.stream + .extend([TokenTree::Literal(Literal::u32_unsuffixed(val))]); + } + + /// Add an `usize` value to the stream. + pub fn lit_usize(&mut self, val: usize) { + self.stream + .extend([TokenTree::Literal(Literal::usize_unsuffixed(val))]); + } + + /// Set the given span on all tokens in the stream. This span is used by rust for e.g. compiler errors, to indicate the position of the error. + pub fn set_span_on_all_tokens(&mut self, span: Span) { + self.stream = std::mem::take(&mut self.stream) + .into_iter() + .map(|mut token| { + token.set_span(span); + token + }) + .collect(); + } +} + +#[derive(Debug)] +pub struct PushParseError { + pub error: LexError, + pub code: String, +} diff --git a/derive/src/lib.rs b/derive/src/lib.rs new file mode 100644 index 000000000..12c09cbbe --- /dev/null +++ b/derive/src/lib.rs @@ -0,0 +1,129 @@ +extern crate proc_macro; + +mod derive_enum; +mod derive_struct; +mod error; +mod generate; +mod parse; + +#[cfg(test)] +pub(crate) mod prelude { + pub use proc_macro2::*; +} +#[cfg(not(test))] +pub(crate) mod prelude { + pub use proc_macro::*; +} + +use error::Error; +use prelude::TokenStream; + +type Result = std::result::Result; + +#[proc_macro_derive(Encode)] +pub fn derive_encode(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + #[allow(clippy::useless_conversion)] + derive_encode_inner(input.into()) + .unwrap_or_else(|e| e.into_token_stream()) + .into() +} + +fn derive_encode_inner(input: TokenStream) -> Result { + let source = &mut input.into_iter().peekable(); + + let _attributes = parse::Attributes::try_take(source)?; + let _visibility = parse::Visibility::try_take(source)?; + let (datatype, name) = parse::DataType::take(source)?; + let generics = parse::Generics::try_take(source)?; + let generic_constraints = parse::GenericConstraints::try_take(source)?; + + let mut generator = generate::Generator::new(name.clone(), generics, generic_constraints); + + match datatype { + parse::DataType::Struct => { + let body = parse::StructBody::take(source)?; + derive_struct::DeriveStruct { + fields: body.fields, + } + .generate_encode(&mut generator)?; + } + parse::DataType::Enum => { + let body = parse::EnumBody::take(source)?; + derive_enum::DeriveEnum { + variants: body.variants, + } + .generate_encode(&mut generator)?; + } + } + + let stream = generator.take_stream(); + dump_output(name, "Encode", &stream); + Ok(stream) +} + +#[proc_macro_derive(Decode)] +pub fn derive_decode(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + #[allow(clippy::useless_conversion)] + derive_decode_inner(input.into()) + .unwrap_or_else(|e| e.into_token_stream()) + .into() +} + +fn derive_decode_inner(input: TokenStream) -> Result { + let source = &mut input.into_iter().peekable(); + + let _attributes = parse::Attributes::try_take(source)?; + let _visibility = parse::Visibility::try_take(source)?; + let (datatype, name) = parse::DataType::take(source)?; + let generics = parse::Generics::try_take(source)?; + let generic_constraints = parse::GenericConstraints::try_take(source)?; + + let mut generator = generate::Generator::new(name.clone(), generics, generic_constraints); + + match datatype { + parse::DataType::Struct => { + let body = parse::StructBody::take(source)?; + derive_struct::DeriveStruct { + fields: body.fields, + } + .generate_decode(&mut generator)?; + } + parse::DataType::Enum => { + let body = parse::EnumBody::take(source)?; + derive_enum::DeriveEnum { + variants: body.variants, + } + .generate_decode(&mut generator)?; + } + } + + let stream = generator.take_stream(); + dump_output(name, "Decode", &stream); + Ok(stream) +} + +fn dump_output(name: crate::prelude::Ident, derive: &str, stream: &crate::prelude::TokenStream) { + use std::io::Write; + + if let Ok(var) = std::env::var("CARGO_MANIFEST_DIR") { + let mut path = std::path::PathBuf::from(var); + path.push("target"); + if path.exists() { + path.push(format!("{}_{}.rs", name, derive)); + if let Ok(mut file) = std::fs::File::create(path) { + let _ = file.write_all(stream.to_string().as_bytes()); + } + } + } +} + +#[cfg(test)] +pub(crate) fn token_stream( + s: &str, +) -> std::iter::Peekable> { + use std::str::FromStr; + + let stream = proc_macro2::TokenStream::from_str(s) + .unwrap_or_else(|e| panic!("Could not parse code: {:?}\n{:?}", s, e)); + stream.into_iter().peekable() +} diff --git a/derive/src/parse/attributes.rs b/derive/src/parse/attributes.rs new file mode 100644 index 000000000..74d194f53 --- /dev/null +++ b/derive/src/parse/attributes.rs @@ -0,0 +1,56 @@ +use super::assume_group; +use crate::parse::consume_punct_if; +use crate::prelude::{Delimiter, Group, Punct, TokenTree}; +use crate::{Error, Result}; +use std::iter::Peekable; + +#[derive(Debug)] +pub struct Attributes { + // we don't use these fields yet + #[allow(dead_code)] + punct: Punct, + #[allow(dead_code)] + tokens: Group, +} + +impl Attributes { + pub fn try_take(input: &mut Peekable>) -> Result> { + if let Some(punct) = consume_punct_if(input, '#') { + // found attributes, next token should be a [] group + if let Some(TokenTree::Group(g)) = input.peek() { + if g.delimiter() != Delimiter::Bracket { + return Err(Error::InvalidRustSyntax(g.span())); + } + return Ok(Some(Attributes { + punct, + tokens: assume_group(input.next()), + })); + } + // expected [] group, found something else + return Err(Error::InvalidRustSyntax(match input.peek() { + Some(next_token) => next_token.span(), + None => punct.span(), + })); + } + Ok(None) + } +} + +#[test] +fn test_attributes_try_take() { + use crate::token_stream; + + let stream = &mut token_stream("struct Foo;"); + assert!(Attributes::try_take(stream).unwrap().is_none()); + match stream.next().unwrap() { + TokenTree::Ident(i) => assert_eq!(i, "struct"), + x => panic!("Expected ident, found {:?}", x), + } + + let stream = &mut token_stream("#[cfg(test)] struct Foo;"); + assert!(Attributes::try_take(stream).unwrap().is_some()); + match stream.next().unwrap() { + TokenTree::Ident(i) => assert_eq!(i, "struct"), + x => panic!("Expected ident, found {:?}", x), + } +} diff --git a/derive/src/parse/body.rs b/derive/src/parse/body.rs new file mode 100644 index 000000000..ff9689d6e --- /dev/null +++ b/derive/src/parse/body.rs @@ -0,0 +1,421 @@ +use super::{assume_group, assume_ident, read_tokens_until_punct, Attributes, Visibility}; +use crate::parse::consume_punct_if; +use crate::prelude::{Delimiter, Ident, Span, TokenTree}; +use crate::{Error, Result}; +use std::iter::Peekable; + +#[derive(Debug)] +pub struct StructBody { + pub fields: Fields, +} + +impl StructBody { + pub fn take(input: &mut Peekable>) -> Result { + match input.peek() { + Some(TokenTree::Group(_)) => {} + Some(TokenTree::Punct(p)) if p.as_char() == ';' => { + return Ok(StructBody { + fields: Fields::Unit, + }) + } + Some(t) => { + return Err(Error::InvalidRustSyntax(t.span())); + } + _ => { + return Err(Error::InvalidRustSyntax(Span::call_site())); + } + } + let group = assume_group(input.next()); + let mut stream = group.stream().into_iter().peekable(); + let fields = match group.delimiter() { + Delimiter::Brace => Fields::Struct(UnnamedField::parse_with_name(&mut stream)?), + Delimiter::Parenthesis => Fields::Tuple(UnnamedField::parse(&mut stream)?), + _ => return Err(Error::InvalidRustSyntax(group.span())), + }; + Ok(StructBody { fields }) + } +} + +#[test] +fn test_struct_body_take() { + use crate::token_stream; + + let stream = &mut token_stream( + "struct Foo { pub bar: u8, pub(crate) baz: u32, bla: Vec>> }", + ); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Struct); + assert_eq!(ident, "Foo"); + let body = StructBody::take(stream).unwrap(); + + assert_eq!(body.fields.len(), 3); + let (ident, field) = body.fields.get(0).unwrap(); + assert_eq!(ident.unwrap(), "bar"); + assert_eq!(field.vis, Visibility::Pub); + assert_eq!(field.type_string(), "u8"); + + let (ident, field) = body.fields.get(1).unwrap(); + assert_eq!(ident.unwrap(), "baz"); + assert_eq!(field.vis, Visibility::Pub); + assert_eq!(field.type_string(), "u32"); + + let (ident, field) = body.fields.get(2).unwrap(); + assert_eq!(ident.unwrap(), "bla"); + assert_eq!(field.vis, Visibility::Default); + assert_eq!(field.type_string(), "Vec>>"); + + let stream = &mut token_stream( + "struct Foo ( pub u8, pub(crate) u32, Vec>> )", + ); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Struct); + assert_eq!(ident, "Foo"); + let body = StructBody::take(stream).unwrap(); + + assert_eq!(body.fields.len(), 3); + + let (ident, field) = body.fields.get(0).unwrap(); + assert!(ident.is_none()); + assert_eq!(field.vis, Visibility::Pub); + assert_eq!(field.type_string(), "u8"); + + let (ident, field) = body.fields.get(1).unwrap(); + assert!(ident.is_none()); + assert_eq!(field.vis, Visibility::Pub); + assert_eq!(field.type_string(), "u32"); + + let (ident, field) = body.fields.get(2).unwrap(); + assert!(ident.is_none()); + assert_eq!(field.vis, Visibility::Default); + assert_eq!(field.type_string(), "Vec>>"); + + let stream = &mut token_stream("struct Foo;"); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Struct); + assert_eq!(ident, "Foo"); + let body = StructBody::take(stream).unwrap(); + assert_eq!(body.fields.len(), 0); + + let stream = &mut token_stream("struct Foo {}"); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Struct); + assert_eq!(ident, "Foo"); + let body = StructBody::take(stream).unwrap(); + assert_eq!(body.fields.len(), 0); + + let stream = &mut token_stream("struct Foo ()"); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Struct); + assert_eq!(ident, "Foo"); + assert_eq!(body.fields.len(), 0); +} + +#[derive(Debug)] +pub struct EnumBody { + pub variants: Vec, +} + +impl EnumBody { + pub fn take(input: &mut Peekable>) -> Result { + match input.peek() { + Some(TokenTree::Group(_)) => {} + Some(TokenTree::Punct(p)) if p.as_char() == ';' => { + return Ok(EnumBody { + variants: Vec::new(), + }) + } + Some(t) => { + return Err(Error::InvalidRustSyntax(t.span())); + } + _ => { + return Err(Error::InvalidRustSyntax(Span::call_site())); + } + } + let group = assume_group(input.next()); + let mut variants = Vec::new(); + let stream = &mut group.stream().into_iter().peekable(); + while stream.peek().is_some() { + let attributes = Attributes::try_take(stream)?; + let ident = match stream.peek() { + Some(TokenTree::Ident(_)) => assume_ident(stream.next()), + Some(x) => return Err(Error::InvalidRustSyntax(x.span())), + None => return Err(Error::InvalidRustSyntax(Span::call_site())), + }; + + let mut fields = Fields::Unit; + + if let Some(TokenTree::Group(_)) = stream.peek() { + let group = assume_group(stream.next()); + let stream = &mut group.stream().into_iter().peekable(); + match group.delimiter() { + Delimiter::Brace => { + fields = Fields::Struct(UnnamedField::parse_with_name(stream)?) + } + Delimiter::Parenthesis => fields = Fields::Tuple(UnnamedField::parse(stream)?), + _ => return Err(Error::InvalidRustSyntax(group.span())), + } + } + consume_punct_if(stream, ','); + + variants.push(EnumVariant { + name: ident, + fields, + attributes, + }); + } + + Ok(EnumBody { variants }) + } +} + +#[test] +fn test_enum_body_take() { + use crate::token_stream; + + let stream = &mut token_stream("enum Foo { }"); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Enum); + assert_eq!(ident, "Foo"); + let body = EnumBody::take(stream).unwrap(); + assert_eq!(0, body.variants.len()); + + let stream = &mut token_stream("enum Foo { Bar, Baz(u8), Blah { a: u32, b: u128 } }"); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Enum); + assert_eq!(ident, "Foo"); + let body = EnumBody::take(stream).unwrap(); + assert_eq!(3, body.variants.len()); + + assert_eq!(body.variants[0].name, "Bar"); + assert!(body.variants[0].fields.is_unit()); + + assert_eq!(body.variants[1].name, "Baz"); + assert_eq!(1, body.variants[1].fields.len()); + let (ident, field) = body.variants[1].fields.get(0).unwrap(); + assert!(ident.is_none()); + assert_eq!(field.type_string(), "u8"); + + assert_eq!(body.variants[2].name, "Blah"); + assert_eq!(2, body.variants[2].fields.len()); + let (ident, field) = body.variants[2].fields.get(0).unwrap(); + assert_eq!(ident.unwrap(), "a"); + assert_eq!(field.type_string(), "u32"); + let (ident, field) = body.variants[2].fields.get(1).unwrap(); + assert_eq!(ident.unwrap(), "b"); + assert_eq!(field.type_string(), "u128"); +} + +#[derive(Debug)] +pub struct EnumVariant { + pub name: Ident, + pub fields: Fields, + pub attributes: Option, +} + +#[derive(Debug)] +pub enum Fields { + /// Empty variant. + /// ```rs + /// enum Foo { + /// Baz, + /// } + /// struct Bar { } + /// ``` + Unit, + + /// Tuple-like variant + /// ```rs + /// enum Foo { + /// Baz(u32) + /// } + /// struct Bar(u32); + /// ``` + Tuple(Vec), + + /// Struct-like variant + /// ```rs + /// enum Foo { + /// Baz { + /// baz: u32 + /// } + /// } + /// struct Bar { + /// baz: u32 + /// } + /// ``` + Struct(Vec<(Ident, UnnamedField)>), +} + +impl Fields { + pub fn names(&self) -> Vec { + match self { + Self::Tuple(fields) => fields + .iter() + .enumerate() + .map(|(idx, field)| IdentOrIndex::Index(idx, field.span())) + .collect(), + Self::Struct(fields) => fields + .iter() + .map(|(ident, _)| IdentOrIndex::Ident(ident)) + .collect(), + Self::Unit => Vec::new(), + } + } + + pub fn delimiter(&self) -> Option { + match self { + Self::Tuple(_) => Some(Delimiter::Parenthesis), + Self::Struct(_) => Some(Delimiter::Brace), + Self::Unit => None, + } + } +} + +#[cfg(test)] +impl Fields { + pub fn is_unit(&self) -> bool { + matches!(self, Self::Unit) + } + + pub fn len(&self) -> usize { + match self { + Self::Tuple(fields) => fields.len(), + Self::Struct(fields) => fields.len(), + Self::Unit => 0, + } + } + + pub fn get(&self, index: usize) -> Option<(Option<&Ident>, &UnnamedField)> { + match self { + Self::Tuple(fields) => fields.get(index).map(|f| (None, f)), + Self::Struct(fields) => fields.get(index).map(|(ident, field)| (Some(ident), field)), + Self::Unit => None, + } + } +} + +#[derive(Debug)] +pub struct UnnamedField { + pub vis: Visibility, + pub r#type: Vec, + pub attributes: Option, +} + +impl UnnamedField { + pub fn parse_with_name( + input: &mut Peekable>, + ) -> Result> { + let mut result = Vec::new(); + loop { + let attributes = Attributes::try_take(input)?; + let vis = Visibility::try_take(input)?; + + let ident = match input.peek() { + Some(TokenTree::Ident(_)) => assume_ident(input.next()), + Some(x) => return Err(Error::InvalidRustSyntax(x.span())), + None => break, + }; + match input.peek() { + Some(TokenTree::Punct(p)) if p.as_char() == ':' => { + input.next(); + } + Some(x) => return Err(Error::InvalidRustSyntax(x.span())), + None => return Err(Error::InvalidRustSyntax(Span::call_site())), + } + let r#type = read_tokens_until_punct(input, &[','])?; + consume_punct_if(input, ','); + result.push(( + ident, + Self { + vis, + r#type, + attributes, + }, + )); + } + Ok(result) + } + + pub fn parse(input: &mut Peekable>) -> Result> { + let mut result = Vec::new(); + while input.peek().is_some() { + let attributes = Attributes::try_take(input)?; + let vis = Visibility::try_take(input)?; + + let r#type = read_tokens_until_punct(input, &[','])?; + consume_punct_if(input, ','); + result.push(Self { + vis, + r#type, + attributes, + }); + } + Ok(result) + } + + #[cfg(test)] + pub fn type_string(&self) -> String { + self.r#type.iter().map(|t| t.to_string()).collect() + } + + pub fn span(&self) -> Span { + // BlockedTODO: https://github.com/rust-lang/rust/issues/54725 + // Span::join is unstable + // if let Some(first) = self.r#type.first() { + // let mut span = first.span(); + // for token in self.r#type.iter().skip(1) { + // span = span.join(span).unwrap(); + // } + // span + // } else { + // Span::call_site() + // } + + match self.r#type.first() { + Some(first) => first.span(), + None => Span::call_site(), + } + } +} + +#[derive(Debug)] +pub enum IdentOrIndex<'a> { + Ident(&'a Ident), + Index(usize, Span), +} + +impl<'a> IdentOrIndex<'a> { + pub fn unwrap_ident(&self) -> &'a Ident { + match self { + Self::Ident(i) => i, + x => panic!("Expected ident, found {:?}", x), + } + } + + pub fn to_token_tree_with_prefix(&self, prefix: &str) -> TokenTree { + TokenTree::Ident(match self { + IdentOrIndex::Ident(i) => (*i).clone(), + IdentOrIndex::Index(idx, span) => { + let name = format!("{}{}", prefix, idx); + Ident::new(&name, *span) + } + }) + } + pub fn to_string_with_prefix(&self, prefix: &str) -> String { + match self { + IdentOrIndex::Ident(i) => i.to_string(), + IdentOrIndex::Index(idx, _) => { + format!("{}{}", prefix, idx) + } + } + } +} + +impl std::fmt::Display for IdentOrIndex<'_> { + fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + IdentOrIndex::Ident(i) => write!(fmt, "{}", i), + IdentOrIndex::Index(idx, _) => write!(fmt, "{}", idx), + } + } +} diff --git a/derive/src/parse/data_type.rs b/derive/src/parse/data_type.rs new file mode 100644 index 000000000..64d07603e --- /dev/null +++ b/derive/src/parse/data_type.rs @@ -0,0 +1,77 @@ +use crate::prelude::{Ident, Span, TokenTree}; +use crate::{Error, Result}; +use std::iter::Peekable; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum DataType { + Enum, + Struct, +} + +impl DataType { + pub fn take(input: &mut Peekable>) -> Result<(Self, Ident)> { + if let Some(TokenTree::Ident(ident)) = input.peek() { + let result = match ident.to_string().as_str() { + "struct" => DataType::Struct, + "enum" => DataType::Enum, + _ => return Err(Error::UnknownDataType(ident.span())), + }; + let ident = super::assume_ident(input.next()); + return match input.next() { + Some(TokenTree::Ident(ident)) => Ok((result, ident)), + Some(t) => Err(Error::InvalidRustSyntax(t.span())), + None => Err(Error::InvalidRustSyntax(ident.span())), + }; + } + let span = input + .peek() + .map(|t| t.span()) + .unwrap_or_else(Span::call_site); + Err(Error::InvalidRustSyntax(span)) + } +} + +#[test] +fn test_datatype_take() { + use crate::token_stream; + + fn validate_output_eq(input: &str, expected_dt: DataType, expected_ident: &str) { + let (dt, ident) = DataType::take(&mut token_stream(input)).unwrap_or_else(|e| { + panic!("Could not parse tokenstream {:?}: {:?}", input, e); + }); + if dt != expected_dt || ident != expected_ident { + println!("While parsing {:?}", input); + panic!( + "Expected {:?} {:?}, received {:?} {:?}", + dt, ident, expected_dt, expected_ident + ); + } + } + + assert!(DataType::take(&mut token_stream("enum")) + .unwrap_err() + .is_invalid_rust_syntax()); + validate_output_eq("enum Foo", DataType::Enum, "Foo"); + validate_output_eq("enum Foo { }", DataType::Enum, "Foo"); + validate_output_eq("enum Foo { bar, baz }", DataType::Enum, "Foo"); + validate_output_eq("enum Foo<'a, T> { bar, baz }", DataType::Enum, "Foo"); + + assert!(DataType::take(&mut token_stream("struct")) + .unwrap_err() + .is_invalid_rust_syntax()); + validate_output_eq("struct Foo { }", DataType::Struct, "Foo"); + validate_output_eq("struct Foo { bar: u32, baz: u32 }", DataType::Struct, "Foo"); + validate_output_eq("struct Foo<'a, T> { bar: &'a T }", DataType::Struct, "Foo"); + + assert!(DataType::take(&mut token_stream("fn foo() {}")) + .unwrap_err() + .is_unknown_data_type()); + + assert!(DataType::take(&mut token_stream("() {}")) + .unwrap_err() + .is_invalid_rust_syntax()); + + assert!(DataType::take(&mut token_stream("")) + .unwrap_err() + .is_invalid_rust_syntax()); +} diff --git a/derive/src/parse/generics.rs b/derive/src/parse/generics.rs new file mode 100644 index 000000000..9495c4c02 --- /dev/null +++ b/derive/src/parse/generics.rs @@ -0,0 +1,417 @@ +use super::assume_punct; +use crate::generate::StreamBuilder; +use crate::parse::{ident_eq, read_tokens_until_punct}; +use crate::prelude::{Ident, TokenTree}; +use crate::{Error, Result}; +use std::iter::Peekable; + +#[derive(Debug)] +pub struct Generics { + lifetimes_and_generics: Vec, +} + +impl Generics { + pub fn try_take(input: &mut Peekable>) -> Result> { + let maybe_punct = input.peek(); + if let Some(TokenTree::Punct(punct)) = maybe_punct { + if punct.as_char() == '<' { + let punct = super::assume_punct(input.next(), '<'); + let mut result = Generics { + lifetimes_and_generics: Vec::new(), + }; + loop { + match input.peek() { + Some(TokenTree::Punct(punct)) if punct.as_char() == '\'' => { + result + .lifetimes_and_generics + .push(Lifetime::take(input)?.into()); + super::consume_punct_if(input, ','); + } + Some(TokenTree::Punct(punct)) if punct.as_char() == '>' => { + assume_punct(input.next(), '>'); + break; + } + Some(TokenTree::Ident(_)) => { + result + .lifetimes_and_generics + .push(Generic::take(input)?.into()); + super::consume_punct_if(input, ','); + } + x => { + return Err(Error::InvalidRustSyntax( + x.map(|x| x.span()).unwrap_or_else(|| punct.span()), + )); + } + } + } + return Ok(Some(result)); + } + } + Ok(None) + } + + pub fn has_lifetime(&self) -> bool { + self.lifetimes_and_generics + .iter() + .any(|lt| lt.is_lifetime()) + } + + pub fn impl_generics(&self) -> StreamBuilder { + let mut result = StreamBuilder::new(); + result.punct('<'); + + for (idx, generic) in self.lifetimes_and_generics.iter().enumerate() { + if idx > 0 { + result.punct(','); + } + + if generic.is_lifetime() { + result.lifetime(generic.ident()); + } else { + result.ident(generic.ident()); + } + + if generic.has_constraints() { + result.punct(':'); + result.extend(generic.constraints()); + } + } + + result.punct('>'); + + result + } + + pub fn impl_generics_with_additional_lifetime(&self, lifetime: &str) -> StreamBuilder { + assert!(self.has_lifetime()); + + let mut result = StreamBuilder::new(); + result.punct('<'); + result.lifetime_str(lifetime); + + if self.has_lifetime() { + for (idx, lt) in self + .lifetimes_and_generics + .iter() + .filter_map(|lt| lt.as_lifetime()) + .enumerate() + { + result.punct(if idx == 0 { ':' } else { '+' }); + result.lifetime(lt.ident.clone()); + } + } + + for generic in &self.lifetimes_and_generics { + result.punct(','); + + if generic.is_lifetime() { + result.lifetime(generic.ident()); + } else { + result.ident(generic.ident()); + } + + if generic.has_constraints() { + result.punct(':'); + result.extend(generic.constraints()); + } + } + + result.punct('>'); + + result + } + + pub fn type_generics(&self) -> StreamBuilder { + let mut result = StreamBuilder::new(); + result.punct('<'); + + for (idx, generic) in self.lifetimes_and_generics.iter().enumerate() { + if idx > 0 { + result.punct(','); + } + if generic.is_lifetime() { + result.lifetime(generic.ident()); + } else { + result.ident(generic.ident()); + } + } + + result.punct('>'); + result + } +} + +#[derive(Debug)] +enum LifetimeOrGeneric { + Lifetime(Lifetime), + Generic(Generic), +} + +impl LifetimeOrGeneric { + fn is_lifetime(&self) -> bool { + matches!(self, LifetimeOrGeneric::Lifetime(_)) + } + + fn ident(&self) -> Ident { + match self { + Self::Lifetime(lt) => lt.ident.clone(), + Self::Generic(gen) => gen.ident.clone(), + } + } + + fn as_lifetime(&self) -> Option<&Lifetime> { + match self { + Self::Lifetime(lt) => Some(lt), + Self::Generic(_) => None, + } + } + + fn has_constraints(&self) -> bool { + match self { + Self::Lifetime(lt) => !lt.constraint.is_empty(), + Self::Generic(gen) => !gen.constraints.is_empty(), + } + } + + fn constraints(&self) -> Vec { + match self { + Self::Lifetime(lt) => lt.constraint.clone(), + Self::Generic(gen) => gen.constraints.clone(), + } + } +} + +impl From for LifetimeOrGeneric { + fn from(lt: Lifetime) -> Self { + Self::Lifetime(lt) + } +} + +impl From for LifetimeOrGeneric { + fn from(gen: Generic) -> Self { + Self::Generic(gen) + } +} + +#[test] +fn test_generics_try_take() { + use crate::token_stream; + + assert!(Generics::try_take(&mut token_stream("")).unwrap().is_none()); + assert!(Generics::try_take(&mut token_stream("foo")) + .unwrap() + .is_none()); + assert!(Generics::try_take(&mut token_stream("()")) + .unwrap() + .is_none()); + + let stream = &mut token_stream("struct Foo<'a, T>()"); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Struct); + assert_eq!(ident, "Foo"); + let generics = Generics::try_take(stream).unwrap().unwrap(); + assert_eq!(generics.lifetimes_and_generics.len(), 2); + assert_eq!(generics.lifetimes_and_generics[0].ident(), "a"); + assert_eq!(generics.lifetimes_and_generics[1].ident(), "T"); + + let stream = &mut token_stream("struct Foo()"); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Struct); + assert_eq!(ident, "Foo"); + let generics = Generics::try_take(stream).unwrap().unwrap(); + assert_eq!(generics.lifetimes_and_generics.len(), 2); + assert_eq!(generics.lifetimes_and_generics[0].ident(), "A"); + assert_eq!(generics.lifetimes_and_generics[1].ident(), "B"); + + let stream = &mut token_stream("struct Foo<'a, T: Display>()"); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Struct); + assert_eq!(ident, "Foo"); + let generics = Generics::try_take(stream).unwrap().unwrap(); + dbg!(&generics); + assert_eq!(generics.lifetimes_and_generics.len(), 2); + assert_eq!(generics.lifetimes_and_generics[0].ident(), "a"); + assert_eq!(generics.lifetimes_and_generics[1].ident(), "T"); + + let stream = &mut token_stream("struct Foo<'a, T: for<'a> Bar<'a> + 'static>()"); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Struct); + assert_eq!(ident, "Foo"); + dbg!(&generics); + assert_eq!(generics.lifetimes_and_generics.len(), 2); + assert_eq!(generics.lifetimes_and_generics[0].ident(), "a"); + assert_eq!(generics.lifetimes_and_generics[1].ident(), "T"); + + let stream = &mut token_stream( + "struct Baz Bar<'a, for<'b> Bar<'b, for<'c> Bar<'c, u32>>>> {}", + ); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Struct); + assert_eq!(ident, "Baz"); + let generics = Generics::try_take(stream).unwrap().unwrap(); + dbg!(&generics); + assert_eq!(generics.lifetimes_and_generics.len(), 1); + assert_eq!(generics.lifetimes_and_generics[0].ident(), "T"); + + let stream = &mut token_stream("struct Baz<()> {}"); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Struct); + assert_eq!(ident, "Baz"); + assert!(Generics::try_take(stream) + .unwrap_err() + .is_invalid_rust_syntax()); + + let stream = &mut token_stream("struct Bar SomeStruct, B>"); + let (data_type, ident) = super::DataType::take(stream).unwrap(); + assert_eq!(data_type, super::DataType::Struct); + assert_eq!(ident, "Bar"); + let generics = Generics::try_take(stream).unwrap().unwrap(); + dbg!(&generics); + assert_eq!(generics.lifetimes_and_generics.len(), 2); + assert_eq!(generics.lifetimes_and_generics[0].ident(), "A"); + assert_eq!(generics.lifetimes_and_generics[1].ident(), "B"); +} + +#[derive(Debug)] +pub struct Lifetime { + ident: Ident, + constraint: Vec, +} + +impl Lifetime { + pub fn take(input: &mut Peekable>) -> Result { + let start = super::assume_punct(input.next(), '\''); + let ident = match input.peek() { + Some(TokenTree::Ident(_)) => super::assume_ident(input.next()), + Some(t) => return Err(Error::ExpectedIdent(t.span())), + None => return Err(Error::ExpectedIdent(start.span())), + }; + + let mut constraint = Vec::new(); + if let Some(TokenTree::Punct(p)) = input.peek() { + if p.as_char() == ':' { + assume_punct(input.next(), ':'); + constraint = super::read_tokens_until_punct(input, &[',', '>'])?; + } + } + + Ok(Self { ident, constraint }) + } + + #[cfg(test)] + fn is_ident(&self, s: &str) -> bool { + self.ident.to_string() == s + } +} + +#[test] +fn test_lifetime_take() { + use crate::token_stream; + use std::panic::catch_unwind; + assert!(Lifetime::take(&mut token_stream("'a")) + .unwrap() + .is_ident("a")); + assert!(catch_unwind(|| Lifetime::take(&mut token_stream("'0"))).is_err()); + assert!(catch_unwind(|| Lifetime::take(&mut token_stream("'("))).is_err()); + assert!(catch_unwind(|| Lifetime::take(&mut token_stream("')"))).is_err()); + assert!(catch_unwind(|| Lifetime::take(&mut token_stream("'0'"))).is_err()); + + let stream = &mut token_stream("'a: 'b>"); + let lifetime = Lifetime::take(stream).unwrap(); + assert_eq!(lifetime.ident, "a"); + assert_eq!(lifetime.constraint.len(), 2); + assume_punct(stream.next(), '>'); + assert!(stream.next().is_none()); +} + +#[derive(Debug)] +pub struct Generic { + ident: Ident, + constraints: Vec, +} + +impl Generic { + pub fn take(input: &mut Peekable>) -> Result { + let ident = super::assume_ident(input.next()); + let mut constraints = Vec::new(); + if let Some(TokenTree::Punct(punct)) = input.peek() { + if punct.as_char() == ':' { + super::assume_punct(input.next(), ':'); + constraints = super::read_tokens_until_punct(input, &['>', ','])?; + } + } + Ok(Generic { ident, constraints }) + } +} + +#[derive(Debug)] +pub struct GenericConstraints { + constraints: Vec, +} + +impl GenericConstraints { + pub fn try_take(input: &mut Peekable>) -> Result> { + match input.peek() { + Some(TokenTree::Ident(ident)) => { + if !ident_eq(ident, "where") { + return Ok(None); + } + } + _ => { + return Ok(None); + } + } + input.next(); + let constraints = read_tokens_until_punct(input, &['{', '('])?; + Ok(Some(Self { constraints })) + } + + pub fn where_clause(&self) -> StreamBuilder { + let mut result = StreamBuilder::new(); + result.ident_str("where"); + result.extend(self.constraints.clone()); + result + } +} + +#[test] +fn test_generic_constraints_try_take() { + use super::{DataType, StructBody, Visibility}; + use crate::token_stream; + + let stream = &mut token_stream("struct Foo where Foo: Bar { }"); + super::DataType::take(stream).unwrap(); + assert!(GenericConstraints::try_take(stream).unwrap().is_some()); + + let stream = &mut token_stream("struct Foo { }"); + super::DataType::take(stream).unwrap(); + assert!(GenericConstraints::try_take(stream).unwrap().is_none()); + + let stream = &mut token_stream("struct Foo where Foo: Bar(Foo)"); + super::DataType::take(stream).unwrap(); + assert!(GenericConstraints::try_take(stream).unwrap().is_some()); + + let stream = &mut token_stream("struct Foo()"); + super::DataType::take(stream).unwrap(); + assert!(GenericConstraints::try_take(stream).unwrap().is_none()); + + let stream = &mut token_stream("struct Foo()"); + assert!(GenericConstraints::try_take(stream).unwrap().is_none()); + + let stream = &mut token_stream("{}"); + assert!(GenericConstraints::try_take(stream).unwrap().is_none()); + + let stream = &mut token_stream(""); + assert!(GenericConstraints::try_take(stream).unwrap().is_none()); + + let stream = &mut token_stream("pub(crate) struct Test {}"); + assert_eq!(Visibility::Pub, Visibility::try_take(stream).unwrap()); + let (data_type, ident) = DataType::take(stream).unwrap(); + assert_eq!(data_type, DataType::Struct); + assert_eq!(ident, "Test"); + let constraints = Generics::try_take(stream).unwrap().unwrap(); + assert_eq!(constraints.lifetimes_and_generics.len(), 1); + assert_eq!(constraints.lifetimes_and_generics[0].ident(), "T"); + let body = StructBody::take(stream).unwrap(); + assert_eq!(body.fields.len(), 0); +} diff --git a/derive/src/parse/mod.rs b/derive/src/parse/mod.rs new file mode 100644 index 000000000..e888abb94 --- /dev/null +++ b/derive/src/parse/mod.rs @@ -0,0 +1,142 @@ +use crate::error::Error; +use crate::prelude::{Delimiter, Group, Ident, Punct, TokenTree}; +use std::iter::Peekable; + +mod attributes; +mod body; +mod data_type; +mod generics; +mod visibility; + +pub use self::attributes::Attributes; +pub use self::body::{EnumBody, EnumVariant, Fields, StructBody, UnnamedField}; +pub use self::data_type::DataType; +pub use self::generics::{Generic, GenericConstraints, Generics, Lifetime}; +pub use self::visibility::Visibility; + +pub(self) fn assume_group(t: Option) -> Group { + match t { + Some(TokenTree::Group(group)) => group, + _ => unreachable!(), + } +} +pub(self) fn assume_ident(t: Option) -> Ident { + match t { + Some(TokenTree::Ident(ident)) => ident, + _ => unreachable!(), + } +} +pub(self) fn assume_punct(t: Option, punct: char) -> Punct { + match t { + Some(TokenTree::Punct(p)) => { + debug_assert_eq!(punct, p.as_char()); + p + } + _ => unreachable!(), + } +} + +pub(self) fn consume_punct_if( + input: &mut Peekable>, + punct: char, +) -> Option { + if let Some(TokenTree::Punct(p)) = input.peek() { + if p.as_char() == punct { + match input.next() { + Some(TokenTree::Punct(p)) => return Some(p), + _ => unreachable!(), + } + } + } + None +} + +#[cfg(test)] +pub(self) fn ident_eq(ident: &Ident, text: &str) -> bool { + ident == text +} + +#[cfg(not(test))] +pub(self) fn ident_eq(ident: &Ident, text: &str) -> bool { + ident.to_string() == text +} + +fn check_if_arrow(tokens: &[TokenTree], punct: &Punct) -> bool { + if punct.as_char() == '>' { + if let Some(TokenTree::Punct(previous_punct)) = tokens.last() { + if previous_punct.as_char() == '-' { + return true; + } + } + } + false +} + +const OPEN_BRACKETS: &[char] = &['<', '(', '[', '{']; +const CLOSING_BRACKETS: &[char] = &['>', ')', ']', '}']; +const BRACKET_DELIMITER: &[Option] = &[ + None, + Some(Delimiter::Parenthesis), + Some(Delimiter::Bracket), + Some(Delimiter::Brace), +]; + +pub(self) fn read_tokens_until_punct( + input: &mut Peekable>, + expected_puncts: &[char], +) -> Result, Error> { + let mut result = Vec::new(); + let mut open_brackets = Vec::::new(); + 'outer: loop { + match input.peek() { + Some(TokenTree::Punct(punct)) => { + if check_if_arrow(&result, punct) { + // do nothing + } else if OPEN_BRACKETS.contains(&punct.as_char()) { + open_brackets.push(punct.as_char()); + } else if let Some(index) = + CLOSING_BRACKETS.iter().position(|c| c == &punct.as_char()) + { + let last_bracket = match open_brackets.pop() { + Some(bracket) => bracket, + None => { + if expected_puncts.contains(&punct.as_char()) { + break; + } + return Err(Error::InvalidRustSyntax(punct.span())); + } + }; + let expected = OPEN_BRACKETS[index]; + assert_eq!( + expected, + last_bracket, + "Unexpected closing bracket: found {}, expected {}", + punct.as_char(), + expected + ); + } else if expected_puncts.contains(&punct.as_char()) && open_brackets.is_empty() { + break; + } + result.push(input.next().unwrap()); + } + Some(TokenTree::Group(g)) if open_brackets.is_empty() => { + for punct in expected_puncts { + if let Some(idx) = OPEN_BRACKETS.iter().position(|c| c == punct) { + if let Some(delim) = BRACKET_DELIMITER[idx] { + if delim == g.delimiter() { + // we need to split on this delimiter + break 'outer; + } + } + } + } + result.push(input.next().unwrap()); + } + Some(_) => result.push(input.next().unwrap()), + None => { + break; + } + } + } + Ok(result) +} diff --git a/derive/src/parse/visibility.rs b/derive/src/parse/visibility.rs new file mode 100644 index 000000000..49a7ebf1a --- /dev/null +++ b/derive/src/parse/visibility.rs @@ -0,0 +1,68 @@ +use crate::prelude::TokenTree; +use crate::Result; +use std::iter::Peekable; + +#[derive(Debug, PartialEq, Clone)] +pub enum Visibility { + Default, + Pub, +} + +impl Visibility { + pub fn try_take(input: &mut Peekable>) -> Result { + if let Some(TokenTree::Ident(ident)) = input.peek() { + if super::ident_eq(ident, "pub") { + // Consume this token + super::assume_ident(input.next()); + + // check if the next token is `pub(...)` + if let Some(TokenTree::Group(_)) = input.peek() { + // we just consume the visibility, we're not actually using it for generation + super::assume_group(input.next()); + } + + return Ok(Visibility::Pub); + } + } + Ok(Visibility::Default) + } +} + +#[test] +fn test_visibility_try_take() { + use crate::token_stream; + + assert_eq!( + Visibility::Default, + Visibility::try_take(&mut token_stream("")).unwrap() + ); + assert_eq!( + Visibility::Pub, + Visibility::try_take(&mut token_stream("pub")).unwrap() + ); + assert_eq!( + Visibility::Pub, + Visibility::try_take(&mut token_stream(" pub ")).unwrap(), + ); + assert_eq!( + Visibility::Pub, + Visibility::try_take(&mut token_stream("\tpub\t")).unwrap() + ); + assert_eq!( + Visibility::Pub, + Visibility::try_take(&mut token_stream("pub(crate)")).unwrap() + ); + assert_eq!( + Visibility::Pub, + Visibility::try_take(&mut token_stream(" pub ( crate ) ")).unwrap() + ); + assert_eq!( + Visibility::Pub, + Visibility::try_take(&mut token_stream("\tpub\t(\tcrate\t)\t")).unwrap() + ); + + assert_eq!( + Visibility::Default, + Visibility::try_take(&mut token_stream("pb")).unwrap() + ); +} diff --git a/docs/spec.md b/docs/spec.md new file mode 100644 index 000000000..316477b52 --- /dev/null +++ b/docs/spec.md @@ -0,0 +1,178 @@ +# Serialization specification + +*NOTE*: Serialization is done by `bincode_derive` by default. If you enable the `serde` flag, serialization with `serde-derive` is supported as well. `serde-derive` has the same guarantees as `bincode_derive` for now. + +Related issue: + +## Endian + +By default `bincode` will serialize values in little endian encoding. This can be overwritten in the `Config`. + +## Basic types + +Boolean types are encoded with 1 byte for each boolean type, with `0` being `false`, `1` being true. Whilst deserilizing every other value will throw an error. + +All basic numeric types will be encoded based on the configured [IntEncoding](#IntEncoding). + +All floating point types will take up exactly 4 (for `f32`) or 8 (for `f64`) bytes. + +All tuples have no additional bytes, and are encoded in their specified order, e.g. +```rust +use bincode::config::Configuration; + +let tuple = (u32::min_value(), i32::max_value()); // 8 bytes +let encoded = bincode::encode_to_vec(tuple, Configuration::legacy()).unwrap(); +assert_eq!(encoded.as_slice(), &[ + 0, 0, 0, 0, // 4 bytes for first type: u32 + 255, 255, 255, 127 // 4 bytes for second type: i32 +]); +``` + +## IntEncoding +Bincode currently supports 2 different types of `IntEncoding`. With the default config, `VarintEncoding` is selected. + +### VarintEncoding +Encoding an unsigned integer v (of any type excepting u8/i8) works as follows: + +1. If `u < 251`, encode it as a single byte with that value. +1. If `251 <= u < 2**16`, encode it as a literal byte 251, followed by a u16 with value `u`. +1. If `2**16 <= u < 2**32`, encode it as a literal byte 252, followed by a u32 with value `u`. +1. If `2**32 <= u < 2**64`, encode it as a literal byte 253, followed by a u64 with value `u`. +1. If `2**64 <= u < 2**128`, encode it as a literal byte 254, followed by a u128 with value `u`. + +`usize` is being encoded/decoded as a `u64` and `isize` is being encoded/decoded as a `i64`. + +See the documentation of [VarintEncoding](https://docs.rs/bincode/latest/bincode/config/struct.VarintEncoding.html) for more information. + +### FixintEncoding + +- Fixed size integers are encoded directly +- Enum discriminants are encoded as u32 +- Lengths and usize are encoded as u64 + +See the documentation of [FixintEncoding](https://docs.rs/bincode/latest/bincode/config/struct.FixintEncoding.html) for more information. + +## Enums + +Enums are encoded with their variant first, followed by optionally the variant fields. The variant index is based on the `IntEncoding` during serilization. + +Both named and unnamed fields are serialized with their values only, and therefor encode to the same value. + +```rust +use bincode::config::Configuration; + +#[derive(bincode::Encode)] +pub enum SomeEnum { + A, + B(u32), + C { value: u32 }, +} + +// SomeEnum::A +let encoded = bincode::encode_to_vec(SomeEnum::A, Configuration::legacy()).unwrap(); +assert_eq!(encoded.as_slice(), &[ + 0, 0, 0, 0, // first variant, A + // no extra bytes because A has no fields +]); + +// SomeEnum::B(0) +let encoded = bincode::encode_to_vec(SomeEnum::B(0), Configuration::legacy()).unwrap(); +assert_eq!(encoded.as_slice(), &[ + 1, 0, 0, 0, // first variant, B + 0, 0, 0, 0 // B has 1 unnamed field, which is an u32, so 4 bytes +]); + +// SomeEnum::C { value: 0u32 } +let encoded = bincode::encode_to_vec(SomeEnum::C { value: 0u32 }, Configuration::legacy()).unwrap(); +assert_eq!(encoded.as_slice(), &[ + 2, 0, 0, 0, // first variant, C + 0, 0, 0, 0 // C has 1 named field which is a u32, so 4 bytes +]); +``` + +# Collections + +Collections are encoded with their length value first, following by each entry of the collection. The length value is based on your `IntEncoding`. + +**note**: fixed array length do not have their `len` encoded. See [Arrays](#arrays) + +```rust +use bincode::config::Configuration; +let list = vec![ + 0u8, + 1u8, + 2u8 +]; + +let encoded = bincode::encode_to_vec(list, Configuration::legacy()).unwrap(); +assert_eq!(encoded.as_slice(), &[ + 3, 0, 0, 0, 0, 0, 0, 0, // length of 3u64 + 0, // entry 0 + 1, // entry 1 + 2, // entry 2 +]); +``` + +This also applies to e.g. `HashMap`, where each entry is a [tuple](#basic-types) of the key and value. + +# String and &str + +Both `String` and `&str` are treated as a `Vec`. See [Collections](#collections) for more information. + +```rust +use bincode::config::Configuration; + +let str = "Hello"; // Could also be `String::new(...)` + +let encoded = bincode::encode_to_vec(str, Configuration::legacy()).unwrap(); +assert_eq!(encoded.as_slice(), &[ + 5, 0, 0, 0, 0, 0, 0, 0, // length of the string, 5 bytes + b'H', b'e', b'l', b'l', b'o' +]); +``` + +# Arrays + +Arrays are encoded *with* a length by default. + +```rust +use bincode::config::Configuration; + +let arr: [u8; 5] = [10, 20, 30, 40, 50]; +let encoded = bincode::encode_to_vec(arr, Configuration::legacy()).unwrap(); +assert_eq!(encoded.as_slice(), &[ + 5, 0, 0, 0, 0, 0, 0, 0, // The length, as a u64 + 10, 20, 30, 40, 50, // the bytes +]); +``` + +This applies to any type `T` that implements `Encodabl`/`Decodabl` + +```rust +use bincode::config::Configuration; + +#[derive(bincode::Encode)] +struct Foo { + first: u8, + second: u8 +}; + +let arr: [Foo; 2] = [ + Foo { + first: 10, + second: 20, + }, + Foo { + first: 30, + second: 40, + }, +]; + +let encoded = bincode::encode_to_vec(arr, Configuration::legacy()).unwrap(); +assert_eq!(encoded.as_slice(), &[ + 2, 0, 0, 0, 0, 0, 0, 0, // Length of the array + 10, 20, // First Foo + 30, 40, // Second Foo +]); +``` + diff --git a/examples/basic.rs b/examples/basic.rs deleted file mode 100644 index 4b4630757..000000000 --- a/examples/basic.rs +++ /dev/null @@ -1,27 +0,0 @@ -#[macro_use] -extern crate serde_derive; -extern crate bincode; - -use bincode::{deserialize, serialize}; - -#[derive(Serialize, Deserialize, PartialEq, Debug)] -struct Entity { - x: f32, - y: f32, -} - -#[derive(Serialize, Deserialize, PartialEq, Debug)] -struct World(Vec); - -fn main() { - let world = World(vec![Entity { x: 0.0, y: 4.0 }, Entity { x: 10.0, y: 20.5 }]); - - let encoded: Vec = serialize(&world).unwrap(); - - // 8 bytes for the length of the vector (usize), 4 bytes per float. - assert_eq!(encoded.len(), 8 + 4 * 4); - - let decoded: World = deserialize(&encoded[..]).unwrap(); - - assert_eq!(world, decoded); -} diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 000000000..dd2d3b9e4 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,249 @@ +//! The config module is used to change the behavior of bincode's encoding and decoding logic. +//! +//! *Important* make sure you use the same config for encoding and decoding, or else bincode will not work properly. +//! +//! To use a config, first create a type of [Configuration]. This type will implement trait [Config] for use with bincode. +//! +//! ``` +//! use bincode::config::{Config, Configuration}; +//! let config = Configuration::standard() +//! // pick one of: +//! .with_big_endian() +//! .with_little_endian() +//! // pick one of: +//! .with_variable_int_encoding() +//! .with_fixed_int_encoding() +//! // pick one of: +//! .skip_fixed_array_length() +//! .write_fixed_array_length(); +//! ``` +//! +//! See [Config] for more information on the configuration options. + +pub(crate) use self::internal::*; +use core::marker::PhantomData; + +/// The Configuration struct is used to build bincode configurations. The [Config] trait is implemented +/// by this struct when a valid configuration has been constructed. +/// +/// The following methods are mutually exclusive and will overwrite each other. The last call to one of these methods determines the behavior of the configuration: +/// +/// - [with_little_endian] and [with_big_endian] +/// - [with_fixed_int_encoding] and [with_variable_int_encoding] +/// - [skip_fixed_array_length] and [write_fixed_array_length] +/// +/// +/// [with_little_endian]: #method.with_little_endian +/// [with_big_endian]: #method.with_big_endian +/// [with_fixed_int_encoding]: #method.with_fixed_int_encoding +/// [with_variable_int_encoding]: #method.with_variable_int_encoding +/// [skip_fixed_array_length]: #method.skip_fixed_array_length +/// [write_fixed_array_length]: #method.write_fixed_array_length +#[derive(Copy, Clone)] +pub struct Configuration { + _e: PhantomData, + _i: PhantomData, + _a: PhantomData, +} + +impl Configuration { + /// The default config for bincode 2.0. By default this will be: + /// - Little endian + /// - Variable int encoding + /// - Skip fixed array length + pub fn standard() -> Self { + Self::generate() + } + + /// Creates the "legacy" default config. This is the default config that was present in bincode 1.0 + /// - Little endian + /// - Fixed int length encoding + /// - Write array lengths + pub fn legacy() -> Configuration { + Self::generate() + } +} + +impl Configuration { + fn generate<_E, _I, _A>() -> Configuration<_E, _I, _A> { + Configuration { + _e: PhantomData, + _i: PhantomData, + _a: PhantomData, + } + } + + /// Makes bincode encode all integer types in big endian. + pub fn with_big_endian(self) -> Configuration { + Self::generate() + } + + /// Makes bincode encode all integer types in little endian. + pub fn with_little_endian(self) -> Configuration { + Self::generate() + } + + /// Makes bincode encode all integer types with a variable integer encoding. + /// + /// Encoding an unsigned integer v (of any type excepting u8) works as follows: + /// + /// 1. If `u < 251`, encode it as a single byte with that value. + /// 2. If `251 <= u < 2**16`, encode it as a literal byte 251, followed by a u16 with value `u`. + /// 3. If `2**16 <= u < 2**32`, encode it as a literal byte 252, followed by a u32 with value `u`. + /// 4. If `2**32 <= u < 2**64`, encode it as a literal byte 253, followed by a u64 with value `u`. + /// 5. If `2**64 <= u < 2**128`, encode it as a literal byte 254, followed by a + /// u128 with value `u`. + /// + /// Then, for signed integers, we first convert to unsigned using the zigzag algorithm, + /// and then encode them as we do for unsigned integers generally. The reason we use this + /// algorithm is that it encodes those values which are close to zero in less bytes; the + /// obvious algorithm, where we encode the cast values, gives a very large encoding for all + /// negative values. + /// + /// The zigzag algorithm is defined as follows: + /// + /// ```ignore + /// fn zigzag(v: Signed) -> Unsigned { + /// match v { + /// 0 => 0, + /// v if v < 0 => |v| * 2 - 1 + /// v if v > 0 => v * 2 + /// } + /// } + /// ``` + /// + /// And works such that: + /// + /// ```ignore + /// assert_eq!(zigzag(0), 0); + /// assert_eq!(zigzag(-1), 1); + /// assert_eq!(zigzag(1), 2); + /// assert_eq!(zigzag(-2), 3); + /// assert_eq!(zigzag(2), 4); + /// assert_eq!(zigzag(i64::min_value()), u64::max_value()); + /// ``` + /// + /// Note that u256 and the like are unsupported by this format; if and when they are added to the + /// language, they may be supported via the extension point given by the 255 byte. + pub fn with_variable_int_encoding(self) -> Configuration { + Self::generate() + } + + /// Fixed-size integer encoding. + /// + /// * Fixed size integers are encoded directly + /// * Enum discriminants are encoded as u32 + /// * Lengths and usize are encoded as u64 + pub fn with_fixed_int_encoding(self) -> Configuration { + Self::generate() + } + + /// Skip writing the length of fixed size arrays (`[u8; N]`) before writing the array + pub fn skip_fixed_array_length(self) -> Configuration { + Self::generate() + } + + /// Write the length of fixed size arrays (`[u8; N]`) before writing the array + pub fn write_fixed_array_length(self) -> Configuration { + Self::generate() + } +} + +/// Indicates a type is valid for controlling the bincode configuration +pub trait Config: + InternalEndianConfig + InternalArrayLengthConfig + InternalIntEncodingConfig + Copy + Clone +{ +} + +impl Config for T where + T: InternalEndianConfig + InternalArrayLengthConfig + InternalIntEncodingConfig + Copy + Clone +{ +} + +#[doc(hidden)] +#[derive(Copy, Clone)] +pub struct BigEndian {} + +impl InternalEndianConfig for BigEndian { + const ENDIAN: Endian = Endian::Big; +} + +#[doc(hidden)] +#[derive(Copy, Clone)] +pub struct LittleEndian {} + +impl InternalEndianConfig for LittleEndian { + const ENDIAN: Endian = Endian::Little; +} + +#[doc(hidden)] +#[derive(Copy, Clone)] +pub struct Fixint {} + +impl InternalIntEncodingConfig for Fixint { + const INT_ENCODING: IntEncoding = IntEncoding::Fixed; +} + +#[doc(hidden)] +#[derive(Copy, Clone)] +pub struct Varint {} + +impl InternalIntEncodingConfig for Varint { + const INT_ENCODING: IntEncoding = IntEncoding::Variable; +} + +#[doc(hidden)] +#[derive(Copy, Clone)] +pub struct SkipFixedArrayLength {} + +impl InternalArrayLengthConfig for SkipFixedArrayLength { + const SKIP_FIXED_ARRAY_LENGTH: bool = true; +} + +#[doc(hidden)] +#[derive(Copy, Clone)] +pub struct WriteFixedArrayLength {} + +impl InternalArrayLengthConfig for WriteFixedArrayLength { + const SKIP_FIXED_ARRAY_LENGTH: bool = false; +} + +mod internal { + use super::Configuration; + + pub trait InternalEndianConfig { + const ENDIAN: Endian; + } + + impl InternalEndianConfig for Configuration { + const ENDIAN: Endian = E::ENDIAN; + } + + #[derive(PartialEq, Eq)] + pub enum Endian { + Little, + Big, + } + + pub trait InternalIntEncodingConfig { + const INT_ENCODING: IntEncoding; + } + + impl InternalIntEncodingConfig for Configuration { + const INT_ENCODING: IntEncoding = I::INT_ENCODING; + } + + #[derive(PartialEq, Eq)] + pub enum IntEncoding { + Fixed, + Variable, + } + + pub trait InternalArrayLengthConfig { + const SKIP_FIXED_ARRAY_LENGTH: bool; + } + + impl InternalArrayLengthConfig for Configuration { + const SKIP_FIXED_ARRAY_LENGTH: bool = A::SKIP_FIXED_ARRAY_LENGTH; + } +} diff --git a/src/config/endian.rs b/src/config/endian.rs deleted file mode 100644 index 4d924bf0d..000000000 --- a/src/config/endian.rs +++ /dev/null @@ -1,29 +0,0 @@ -use byteorder::{self, ByteOrder}; - -pub trait BincodeByteOrder { - type Endian: ByteOrder + 'static; -} - -/// Little-endian byte ordering. -#[derive(Copy, Clone)] -pub struct LittleEndian; - -/// Big-endian byte ordering. -#[derive(Copy, Clone)] -pub struct BigEndian; - -/// The native byte ordering of the current system. -#[derive(Copy, Clone)] -pub struct NativeEndian; - -impl BincodeByteOrder for LittleEndian { - type Endian = byteorder::LittleEndian; -} - -impl BincodeByteOrder for BigEndian { - type Endian = byteorder::BigEndian; -} - -impl BincodeByteOrder for NativeEndian { - type Endian = byteorder::NativeEndian; -} diff --git a/src/config/int.rs b/src/config/int.rs deleted file mode 100644 index 4976b7430..000000000 --- a/src/config/int.rs +++ /dev/null @@ -1,755 +0,0 @@ -use std::io::Write; -use std::mem::size_of; - -use super::{BincodeByteOrder, Options}; -use crate::de::read::BincodeRead; -use crate::error::{ErrorKind, Result}; - -pub trait IntEncoding { - /// Gets the size (in bytes) that a value would be serialized to. - fn u16_size(n: u16) -> u64; - /// Gets the size (in bytes) that a value would be serialized to. - fn u32_size(n: u32) -> u64; - /// Gets the size (in bytes) that a value would be serialized to. - fn u64_size(n: u64) -> u64; - - /// Gets the size (in bytes) that a value would be serialized to. - fn i16_size(n: i16) -> u64; - /// Gets the size (in bytes) that a value would be serialized to. - fn i32_size(n: i32) -> u64; - /// Gets the size (in bytes) that a value would be serialized to. - fn i64_size(n: i64) -> u64; - - #[inline(always)] - fn len_size(len: usize) -> u64 { - Self::u64_size(len as u64) - } - - /// Serializes a sequence length. - #[inline(always)] - fn serialize_len( - ser: &mut crate::ser::Serializer, - len: usize, - ) -> Result<()> { - Self::serialize_u64(ser, len as u64) - } - - fn serialize_u16( - ser: &mut crate::ser::Serializer, - val: u16, - ) -> Result<()>; - - fn serialize_u32( - ser: &mut crate::ser::Serializer, - val: u32, - ) -> Result<()>; - - fn serialize_u64( - ser: &mut crate::ser::Serializer, - val: u64, - ) -> Result<()>; - - fn serialize_i16( - ser: &mut crate::ser::Serializer, - val: i16, - ) -> Result<()>; - - fn serialize_i32( - ser: &mut crate::ser::Serializer, - val: i32, - ) -> Result<()>; - - fn serialize_i64( - ser: &mut crate::ser::Serializer, - val: i64, - ) -> Result<()>; - - /// Deserializes a sequence length. - #[inline(always)] - fn deserialize_len<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::de::Deserializer, - ) -> Result { - Self::deserialize_u64(de).and_then(cast_u64_to_usize) - } - - fn deserialize_u16<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::de::Deserializer, - ) -> Result; - - fn deserialize_u32<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::de::Deserializer, - ) -> Result; - - fn deserialize_u64<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::de::Deserializer, - ) -> Result; - - fn deserialize_i16<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::de::Deserializer, - ) -> Result; - - fn deserialize_i32<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::de::Deserializer, - ) -> Result; - - fn deserialize_i64<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::de::Deserializer, - ) -> Result; - - serde_if_integer128! { - fn u128_size(v: u128) -> u64; - fn i128_size(v: i128) -> u64; - fn serialize_u128( - ser: &mut crate::Serializer, - val: u128, - ) -> Result<()>; - fn deserialize_u128<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result; - fn serialize_i128( - ser: &mut crate::Serializer, - val: i128, - ) -> Result<()>; - fn deserialize_i128<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result; - } -} - -/// Fixed-size integer encoding. -/// -/// * Fixed size integers are encoded directly -/// * Enum discriminants are encoded as u32 -/// * Lengths and usize are encoded as u64 -#[derive(Copy, Clone)] -pub struct FixintEncoding; - -/// Variable-size integer encoding (excepting [ui]8). -/// -/// Encoding an unsigned integer v (of any type excepting u8) works as follows: -/// -/// 1. If `u < 251`, encode it as a single byte with that value. -/// 2. If `251 <= u < 2**16`, encode it as a literal byte 251, followed by a u16 with value `u`. -/// 3. If `2**16 <= u < 2**32`, encode it as a literal byte 252, followed by a u32 with value `u`. -/// 4. If `2**32 <= u < 2**64`, encode it as a literal byte 253, followed by a u64 with value `u`. -/// 5. If `2**64 <= u < 2**128`, encode it as a literal byte 254, followed by a -/// u128 with value `u`. -/// -/// Then, for signed integers, we first convert to unsigned using the zigzag algorithm, -/// and then encode them as we do for unsigned integers generally. The reason we use this -/// algorithm is that it encodes those values which are close to zero in less bytes; the -/// obvious algorithm, where we encode the cast values, gives a very large encoding for all -/// negative values. -/// -/// The zigzag algorithm is defined as follows: -/// -/// ```ignore -/// fn zigzag(v: Signed) -> Unsigned { -/// match v { -/// 0 => 0, -/// v if v < 0 => |v| * 2 - 1 -/// v if v > 0 => v * 2 -/// } -/// } -/// ``` -/// -/// And works such that: -/// -/// ```ignore -/// assert_eq!(zigzag(0), 0); -/// assert_eq!(zigzag(-1), 1); -/// assert_eq!(zigzag(1), 2); -/// assert_eq!(zigzag(-2), 3); -/// assert_eq!(zigzag(2), 4); -/// assert_eq!(zigzag(i64::min_value()), u64::max_value()); -/// ``` -/// -/// Note that u256 and the like are unsupported by this format; if and when they are added to the -/// language, they may be supported via the extension point given by the 255 byte. -#[derive(Copy, Clone)] -pub struct VarintEncoding; - -const SINGLE_BYTE_MAX: u8 = 250; -const U16_BYTE: u8 = 251; -const U32_BYTE: u8 = 252; -const U64_BYTE: u8 = 253; -const U128_BYTE: u8 = 254; -const DESERIALIZE_EXTENSION_POINT_ERR: &str = r#" -Byte 255 is treated as an extension point; it should not be encoding anything. -Do you have a mismatched bincode version or configuration? -"#; - -#[inline(never)] -#[cold] -fn deserialize_varint_cold<'a, O, R>(reader: &mut R) -> Result -where - O: byteorder::ByteOrder, - R: BincodeRead<'a>, -{ - use byteorder::ReadBytesExt; - #[allow(ellipsis_inclusive_range_patterns)] - match reader.read_u8()? { - byte @ 0...crate::config::int::SINGLE_BYTE_MAX => Ok(byte as u64), - U16_BYTE => Ok(reader.read_u16::()? as u64), - U32_BYTE => Ok(reader.read_u32::()? as u64), - U64_BYTE => Ok(reader.read_u64::()? as u64), - other => invalid_varint_discriminant(other), - } -} - -#[inline(never)] -#[cold] -fn invalid_varint_discriminant(discriminant: u8) -> Result { - let msg = match discriminant { - U128_BYTE => { - "Invalid value (u128 range): you may have a version or configuration disagreement?" - } - _ => DESERIALIZE_EXTENSION_POINT_ERR, - }; - Err(Box::new(crate::ErrorKind::Custom(msg.to_string()))) -} - -impl VarintEncoding { - fn varint_size(n: u64) -> u64 { - if n <= SINGLE_BYTE_MAX as u64 { - 1 - } else if n <= u16::max_value() as u64 { - (1 + size_of::()) as u64 - } else if n <= u32::max_value() as u64 { - (1 + size_of::()) as u64 - } else { - (1 + size_of::()) as u64 - } - } - - #[inline(always)] - fn zigzag_encode(n: i64) -> u64 { - if n < 0 { - // let's avoid the edge case of i64::min_value() - // !n is equal to `-n - 1`, so this is: - // !n * 2 + 1 = 2(-n - 1) + 1 = -2n - 2 + 1 = -2n - 1 - !(n as u64) * 2 + 1 - } else { - (n as u64) * 2 - } - } - - #[inline(always)] - fn zigzag_decode(n: u64) -> i64 { - if n % 2 == 0 { - // positive number - (n / 2) as i64 - } else { - // negative number - // !m * 2 + 1 = n - // !m * 2 = n - 1 - // !m = (n - 1) / 2 - // m = !((n - 1) / 2) - // since we have n is odd, we have floor(n / 2) = floor((n - 1) / 2) - !(n / 2) as i64 - } - } - - fn serialize_varint( - ser: &mut crate::ser::Serializer, - n: u64, - ) -> Result<()> { - if n <= SINGLE_BYTE_MAX as u64 { - ser.serialize_byte(n as u8) - } else if n <= u16::max_value() as u64 { - ser.serialize_byte(U16_BYTE)?; - ser.serialize_literal_u16(n as u16) - } else if n <= u32::max_value() as u64 { - ser.serialize_byte(U32_BYTE)?; - ser.serialize_literal_u32(n as u32) - } else { - ser.serialize_byte(U64_BYTE)?; - ser.serialize_literal_u64(n as u64) - } - } - - #[inline] - fn deserialize_varint<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::de::Deserializer, - ) -> Result { - let read_u16 = <::Endian as byteorder::ByteOrder>::read_u16; - let read_u32 = <::Endian as byteorder::ByteOrder>::read_u32; - let read_u64 = <::Endian as byteorder::ByteOrder>::read_u64; - if let Some(bytes) = de.reader.peek_read(9) { - let (discriminant, bytes) = bytes.split_at(1); - let (out, used) = match discriminant[0] { - byte @ 0..=crate::config::int::SINGLE_BYTE_MAX => (byte as u64, 1), - U16_BYTE => (read_u16(&bytes[..2]) as u64, 3), - U32_BYTE => (read_u32(&bytes[..4]) as u64, 5), - U64_BYTE => (read_u64(&bytes[..8]) as u64, 9), - other => return invalid_varint_discriminant(other), - }; - de.reader.consume(used); - Ok(out) - } else { - deserialize_varint_cold::<::Endian, R>(&mut de.reader) - } - } - - serde_if_integer128! { - // see zigzag_encode and zigzag_decode for implementation comments - #[inline(always)] - fn zigzag128_encode(n: i128) -> u128 { - if n < 0 { - !(n as u128) * 2 + 1 - } else { - (n as u128) * 2 - } - } - #[inline(always)] - fn zigzag128_decode(n: u128) -> i128 { - if n % 2 == 0 { - (n / 2) as i128 - } else { - !(n / 2) as i128 - } - } - - fn varint128_size(n: u128) -> u64 { - if n <= SINGLE_BYTE_MAX as u128 { - 1 - } else if n <= u16::max_value() as u128 { - (1 + size_of::()) as u64 - } else if n <= u32::max_value() as u128 { - (1 + size_of::()) as u64 - } else if n <= u64::max_value() as u128 { - (1 + size_of::()) as u64 - } else { - (1 + size_of::()) as u64 - } - } - - fn serialize_varint128( - ser: &mut crate::ser::Serializer, - n: u128, - ) -> Result<()> { - if n <= SINGLE_BYTE_MAX as u128 { - ser.serialize_byte(n as u8) - } else if n <= u16::max_value() as u128 { - ser.serialize_byte(U16_BYTE)?; - ser.serialize_literal_u16(n as u16) - } else if n <= u32::max_value() as u128 { - ser.serialize_byte(U32_BYTE)?; - ser.serialize_literal_u32(n as u32) - } else if n <= u64::max_value() as u128 { - ser.serialize_byte(U64_BYTE)?; - ser.serialize_literal_u64(n as u64) - } else { - ser.serialize_byte(U128_BYTE)?; - ser.serialize_literal_u128(n) - } - } - - fn deserialize_varint128<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::de::Deserializer, - ) -> Result { - #[allow(ellipsis_inclusive_range_patterns)] - match de.deserialize_byte()? { - byte @ 0...SINGLE_BYTE_MAX => Ok(byte as u128), - U16_BYTE => Ok(de.deserialize_literal_u16()? as u128), - U32_BYTE => Ok(de.deserialize_literal_u32()? as u128), - U64_BYTE => Ok(de.deserialize_literal_u64()? as u128), - U128_BYTE => de.deserialize_literal_u128(), - _ => Err(Box::new(ErrorKind::Custom(DESERIALIZE_EXTENSION_POINT_ERR.to_string()))), - } - } - } -} - -impl IntEncoding for FixintEncoding { - #[inline(always)] - fn u16_size(_: u16) -> u64 { - size_of::() as u64 - } - #[inline(always)] - fn u32_size(_: u32) -> u64 { - size_of::() as u64 - } - #[inline(always)] - fn u64_size(_: u64) -> u64 { - size_of::() as u64 - } - - #[inline(always)] - fn i16_size(_: i16) -> u64 { - size_of::() as u64 - } - #[inline(always)] - fn i32_size(_: i32) -> u64 { - size_of::() as u64 - } - #[inline(always)] - fn i64_size(_: i64) -> u64 { - size_of::() as u64 - } - - #[inline(always)] - fn serialize_u16( - ser: &mut crate::Serializer, - val: u16, - ) -> Result<()> { - ser.serialize_literal_u16(val) - } - #[inline(always)] - fn serialize_u32( - ser: &mut crate::Serializer, - val: u32, - ) -> Result<()> { - ser.serialize_literal_u32(val) - } - #[inline(always)] - fn serialize_u64( - ser: &mut crate::Serializer, - val: u64, - ) -> Result<()> { - ser.serialize_literal_u64(val) - } - - #[inline(always)] - fn serialize_i16( - ser: &mut crate::Serializer, - val: i16, - ) -> Result<()> { - ser.serialize_literal_u16(val as u16) - } - #[inline(always)] - fn serialize_i32( - ser: &mut crate::Serializer, - val: i32, - ) -> Result<()> { - ser.serialize_literal_u32(val as u32) - } - #[inline(always)] - fn serialize_i64( - ser: &mut crate::Serializer, - val: i64, - ) -> Result<()> { - ser.serialize_literal_u64(val as u64) - } - - #[inline(always)] - fn deserialize_u16<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - de.deserialize_literal_u16() - } - #[inline(always)] - fn deserialize_u32<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - de.deserialize_literal_u32() - } - #[inline(always)] - fn deserialize_u64<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - de.deserialize_literal_u64() - } - - #[inline(always)] - fn deserialize_i16<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - Ok(de.deserialize_literal_u16()? as i16) - } - #[inline(always)] - fn deserialize_i32<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - Ok(de.deserialize_literal_u32()? as i32) - } - #[inline(always)] - fn deserialize_i64<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - Ok(de.deserialize_literal_u64()? as i64) - } - - serde_if_integer128! { - #[inline(always)] - fn u128_size(_: u128) -> u64{ - size_of::() as u64 - } - #[inline(always)] - fn i128_size(_: i128) -> u64{ - size_of::() as u64 - } - - #[inline(always)] - fn serialize_u128( - ser: &mut crate::Serializer, - val: u128, - ) -> Result<()> { - ser.serialize_literal_u128(val) - } - #[inline(always)] - fn serialize_i128( - ser: &mut crate::Serializer, - val: i128, - ) -> Result<()> { - ser.serialize_literal_u128(val as u128) - } - #[inline(always)] - fn deserialize_u128<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - de.deserialize_literal_u128() - } - #[inline(always)] - fn deserialize_i128<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - Ok(de.deserialize_literal_u128()? as i128) - } - } -} - -impl IntEncoding for VarintEncoding { - #[inline(always)] - fn u16_size(n: u16) -> u64 { - Self::varint_size(n as u64) - } - #[inline(always)] - fn u32_size(n: u32) -> u64 { - Self::varint_size(n as u64) - } - #[inline(always)] - fn u64_size(n: u64) -> u64 { - Self::varint_size(n) - } - - #[inline(always)] - fn i16_size(n: i16) -> u64 { - Self::varint_size(Self::zigzag_encode(n as i64)) - } - #[inline(always)] - fn i32_size(n: i32) -> u64 { - Self::varint_size(Self::zigzag_encode(n as i64)) - } - #[inline(always)] - fn i64_size(n: i64) -> u64 { - Self::varint_size(Self::zigzag_encode(n)) - } - - #[inline(always)] - fn serialize_u16( - ser: &mut crate::Serializer, - val: u16, - ) -> Result<()> { - Self::serialize_varint(ser, val as u64) - } - #[inline(always)] - fn serialize_u32( - ser: &mut crate::Serializer, - val: u32, - ) -> Result<()> { - Self::serialize_varint(ser, val as u64) - } - #[inline(always)] - fn serialize_u64( - ser: &mut crate::Serializer, - val: u64, - ) -> Result<()> { - Self::serialize_varint(ser, val) - } - - #[inline(always)] - fn serialize_i16( - ser: &mut crate::Serializer, - val: i16, - ) -> Result<()> { - Self::serialize_varint(ser, Self::zigzag_encode(val as i64)) - } - #[inline(always)] - fn serialize_i32( - ser: &mut crate::Serializer, - val: i32, - ) -> Result<()> { - Self::serialize_varint(ser, Self::zigzag_encode(val as i64)) - } - #[inline(always)] - fn serialize_i64( - ser: &mut crate::Serializer, - val: i64, - ) -> Result<()> { - Self::serialize_varint(ser, Self::zigzag_encode(val)) - } - - #[inline(always)] - fn deserialize_u16<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - Self::deserialize_varint(de).and_then(cast_u64_to_u16) - } - #[inline(always)] - fn deserialize_u32<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - Self::deserialize_varint(de).and_then(cast_u64_to_u32) - } - #[inline(always)] - fn deserialize_u64<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - Self::deserialize_varint(de) - } - - #[inline(always)] - fn deserialize_i16<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - Self::deserialize_varint(de) - .map(Self::zigzag_decode) - .and_then(cast_i64_to_i16) - } - #[inline(always)] - fn deserialize_i32<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - Self::deserialize_varint(de) - .map(Self::zigzag_decode) - .and_then(cast_i64_to_i32) - } - #[inline(always)] - fn deserialize_i64<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - Self::deserialize_varint(de).map(Self::zigzag_decode) - } - - serde_if_integer128! { - #[inline(always)] - fn u128_size(n: u128) -> u64 { - Self::varint128_size(n) - } - #[inline(always)] - fn i128_size(n: i128) -> u64 { - Self::varint128_size(Self::zigzag128_encode(n)) - } - #[inline(always)] - fn serialize_u128( - ser: &mut crate::Serializer, - val: u128, - ) -> Result<()> { - Self::serialize_varint128(ser, val) - } - #[inline(always)] - fn serialize_i128( - ser: &mut crate::Serializer, - val: i128, - ) -> Result<()> { - Self::serialize_varint128(ser, Self::zigzag128_encode(val)) - } - #[inline(always)] - fn deserialize_u128<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - Self::deserialize_varint128(de) - } - #[inline(always)] - fn deserialize_i128<'de, R: BincodeRead<'de>, O: Options>( - de: &mut crate::Deserializer, - ) -> Result { - Self::deserialize_varint128(de).map(Self::zigzag128_decode) - } - } -} - -fn cast_u64_to_usize(n: u64) -> Result { - if n <= usize::max_value() as u64 { - Ok(n as usize) - } else { - Err(Box::new(ErrorKind::Custom(format!( - "Invalid size {}: sizes must fit in a usize (0 to {})", - n, - usize::max_value() - )))) - } -} -#[inline] -fn cast_u64_to_u32(n: u64) -> Result { - if n <= u32::max_value() as u64 { - Ok(n as u32) - } else { - Err(Box::new(ErrorKind::Custom(format!( - "Invalid u32 {}: you may have a version disagreement?", - n, - )))) - } -} - -#[inline] -fn cast_u64_to_u16(n: u64) -> Result { - if n <= u16::max_value() as u64 { - Ok(n as u16) - } else { - Err(Box::new(ErrorKind::Custom(format!( - "Invalid u16 {}: you may have a version disagreement?", - n, - )))) - } -} - -fn cast_i64_to_i32(n: i64) -> Result { - if n <= i32::max_value() as i64 && n >= i32::min_value() as i64 { - Ok(n as i32) - } else { - Err(Box::new(ErrorKind::Custom(format!( - "Invalid i32 {}: you may have a version disagreement?", - n, - )))) - } -} - -fn cast_i64_to_i16(n: i64) -> Result { - if n <= i16::max_value() as i64 && n >= i16::min_value() as i64 { - Ok(n as i16) - } else { - Err(Box::new(ErrorKind::Custom(format!( - "Invalid i16 {}: you may have a version disagreement?", - n, - )))) - } -} - -#[cfg(test)] -mod test { - use super::VarintEncoding; - - #[test] - fn test_zigzag_encode() { - let zigzag = VarintEncoding::zigzag_encode; - - assert_eq!(zigzag(0), 0); - for x in 1..512 { - assert_eq!(zigzag(x), (x as u64) * 2); - assert_eq!(zigzag(-x), (x as u64) * 2 - 1); - } - } - - #[test] - fn test_zigzag_decode() { - // zigzag' - let zigzagp = VarintEncoding::zigzag_decode; - for x in (0..512).map(|x| x * 2) { - assert_eq!(zigzagp(x), x as i64 / 2); - assert_eq!(zigzagp(x + 1), -(x as i64) / 2 - 1); - } - } - - #[test] - fn test_zigzag_edge_cases() { - let (zigzag, zigzagp) = (VarintEncoding::zigzag_encode, VarintEncoding::zigzag_decode); - - assert_eq!(zigzag(i64::max_value()), u64::max_value() - 1); - assert_eq!(zigzag(i64::min_value()), u64::max_value()); - - assert_eq!(zigzagp(u64::max_value() - 1), i64::max_value()); - assert_eq!(zigzagp(u64::max_value()), i64::min_value()); - } -} diff --git a/src/config/limit.rs b/src/config/limit.rs deleted file mode 100644 index 93ad2517b..000000000 --- a/src/config/limit.rs +++ /dev/null @@ -1,49 +0,0 @@ -use crate::error::{ErrorKind, Result}; - -/// A trait for stopping serialization and deserialization when a certain limit has been reached. -pub trait SizeLimit { - /// Tells the SizeLimit that a certain number of bytes has been - /// read or written. Returns Err if the limit has been exceeded. - fn add(&mut self, n: u64) -> Result<()>; - /// Returns the hard limit (if one exists) - fn limit(&self) -> Option; -} - -/// A SizeLimit that restricts serialized or deserialized messages from -/// exceeding a certain byte length. -#[derive(Copy, Clone)] -pub struct Bounded(pub u64); - -/// A SizeLimit without a limit! -/// Use this if you don't care about the size of encoded or decoded messages. -#[derive(Copy, Clone)] -pub struct Infinite; - -impl SizeLimit for Bounded { - #[inline(always)] - fn add(&mut self, n: u64) -> Result<()> { - if self.0 >= n { - self.0 -= n; - Ok(()) - } else { - Err(Box::new(ErrorKind::SizeLimit)) - } - } - - #[inline(always)] - fn limit(&self) -> Option { - Some(self.0) - } -} - -impl SizeLimit for Infinite { - #[inline(always)] - fn add(&mut self, _: u64) -> Result<()> { - Ok(()) - } - - #[inline(always)] - fn limit(&self) -> Option { - None - } -} diff --git a/src/config/mod.rs b/src/config/mod.rs deleted file mode 100644 index e4bc4a5b6..000000000 --- a/src/config/mod.rs +++ /dev/null @@ -1,405 +0,0 @@ -//! `bincode` uses a Builder-pattern to configure the Serializers and Deserializers in this -//! crate. This means that if you need to customize the behavior of `bincode`, you should create an -//! instance of the `DefaultOptions` struct: -//! -//! ```rust -//! use bincode::Options; -//! let my_options = bincode::DefaultOptions::new(); -//! ``` -//! -//! # Options Struct vs bincode functions -//! -//! Due to historical reasons, the default options used by the `serialize()` and `deserialize()` -//! family of functions are different than the default options created by the `DefaultOptions` struct: -//! -//! | | Byte limit | Endianness | Int Encoding | Trailing Behavior | -//! |----------|------------|------------|--------------|-------------------| -//! | struct | Unlimited | Little | Varint | Reject | -//! | function | Unlimited | Little | Fixint | Allow | -//! -//! This means that if you want to use the `Serialize` / `Deserialize` structs with the same -//! settings as the functions, you should adjust the `DefaultOptions` struct like so: -//! -//! ```rust -//! use bincode::Options; -//! let my_options = bincode::DefaultOptions::new() -//! .with_fixint_encoding() -//! .allow_trailing_bytes(); -//! ``` - -use crate::de::read::BincodeRead; -use crate::error::Result; -use std::io::{Read, Write}; -use std::marker::PhantomData; - -pub(crate) use self::endian::BincodeByteOrder; -pub(crate) use self::int::IntEncoding; -pub(crate) use self::internal::*; -pub(crate) use self::limit::SizeLimit; -pub(crate) use self::trailing::TrailingBytes; - -pub use self::endian::{BigEndian, LittleEndian, NativeEndian}; -pub use self::int::{FixintEncoding, VarintEncoding}; -pub use self::limit::{Bounded, Infinite}; -pub use self::trailing::{AllowTrailing, RejectTrailing}; - -mod endian; -mod int; -mod limit; -mod trailing; - -/// The default options for bincode serialization/deserialization. -/// -/// ### Defaults -/// By default bincode will use little-endian encoding for multi-byte integers, and will not -/// limit the number of serialized/deserialized bytes. -/// -/// ### Configuring `DefaultOptions` -/// -/// `DefaultOptions` implements the [Options] trait, which means it exposes functions to change the behavior of bincode. -/// -/// For example, if you wanted to limit the bincode deserializer to 1 kilobyte of user input: -/// -/// ```rust -/// use bincode::Options; -/// let my_options = bincode::DefaultOptions::new().with_limit(1024); -/// ``` -/// -/// ### DefaultOptions struct vs. functions -/// -/// The default configuration used by this struct is not the same as that used by the bincode -/// helper functions in the root of this crate. See the -/// [config](index.html#options-struct-vs-bincode-functions) module for more details -#[derive(Copy, Clone)] -pub struct DefaultOptions(Infinite); - -impl DefaultOptions { - /// Get a default configuration object. - /// - /// ### Default Configuration: - /// - /// | Byte limit | Endianness | Int Encoding | Trailing Behavior | - /// |------------|------------|--------------|-------------------| - /// | Unlimited | Little | Varint | Reject | - pub fn new() -> DefaultOptions { - DefaultOptions(Infinite) - } -} - -impl Default for DefaultOptions { - fn default() -> Self { - Self::new() - } -} - -impl InternalOptions for DefaultOptions { - type Limit = Infinite; - type Endian = LittleEndian; - type IntEncoding = VarintEncoding; - type Trailing = RejectTrailing; - - #[inline(always)] - fn limit(&mut self) -> &mut Infinite { - &mut self.0 - } -} - -/// A configuration builder trait whose options Bincode will use -/// while serializing and deserializing. -/// -/// ### Options -/// Endianness: The endianness with which multi-byte integers will be read/written. *default: little endian* -/// -/// Limit: The maximum number of bytes that will be read/written in a bincode serialize/deserialize. *default: unlimited* -/// -/// Int Encoding: The encoding used for numbers, enum discriminants, and lengths. *default: varint* -/// -/// Trailing Behavior: The behavior when there are trailing bytes left over in a slice after deserialization. *default: reject* -/// -/// ### Byte Limit Details -/// The purpose of byte-limiting is to prevent Denial-Of-Service attacks whereby malicious attackers get bincode -/// deserialization to crash your process by allocating too much memory or keeping a connection open for too long. -/// -/// When a byte limit is set, bincode will return `Err` on any deserialization that goes over the limit, or any -/// serialization that goes over the limit. -pub trait Options: InternalOptions + Sized { - /// Sets the byte limit to be unlimited. - /// This is the default. - fn with_no_limit(self) -> WithOtherLimit { - WithOtherLimit::new(self, Infinite) - } - - /// Sets the byte limit to `limit`. - fn with_limit(self, limit: u64) -> WithOtherLimit { - WithOtherLimit::new(self, Bounded(limit)) - } - - /// Sets the endianness to little-endian - /// This is the default. - fn with_little_endian(self) -> WithOtherEndian { - WithOtherEndian::new(self) - } - - /// Sets the endianness to big-endian - fn with_big_endian(self) -> WithOtherEndian { - WithOtherEndian::new(self) - } - - /// Sets the endianness to the the machine-native endianness - fn with_native_endian(self) -> WithOtherEndian { - WithOtherEndian::new(self) - } - - /// Sets the integer encoding to varint - fn with_varint_encoding(self) -> WithOtherIntEncoding { - WithOtherIntEncoding::new(self) - } - - /// Sets the integer encoding to be fixed - fn with_fixint_encoding(self) -> WithOtherIntEncoding { - WithOtherIntEncoding::new(self) - } - - /// Sets the deserializer to reject trailing bytes - fn reject_trailing_bytes(self) -> WithOtherTrailing { - WithOtherTrailing::new(self) - } - - /// Sets the deserializer to allow trailing bytes - fn allow_trailing_bytes(self) -> WithOtherTrailing { - WithOtherTrailing::new(self) - } - - /// Serializes a serializable object into a `Vec` of bytes using this configuration - #[inline(always)] - fn serialize(self, t: &S) -> Result> { - crate::internal::serialize(t, self) - } - - /// Returns the size that an object would be if serialized using Bincode with this configuration - #[inline(always)] - fn serialized_size(self, t: &T) -> Result { - crate::internal::serialized_size(t, self) - } - - /// Serializes an object directly into a `Writer` using this configuration - /// - /// If the serialization would take more bytes than allowed by the size limit, an error - /// is returned and *no bytes* will be written into the `Writer` - #[inline(always)] - fn serialize_into(self, w: W, t: &T) -> Result<()> { - crate::internal::serialize_into(w, t, self) - } - - /// Deserializes a slice of bytes into an instance of `T` using this configuration - #[inline(always)] - fn deserialize<'a, T: serde::Deserialize<'a>>(self, bytes: &'a [u8]) -> Result { - crate::internal::deserialize(bytes, self) - } - - /// TODO: document - #[doc(hidden)] - #[inline(always)] - fn deserialize_in_place<'a, R, T>(self, reader: R, place: &mut T) -> Result<()> - where - R: BincodeRead<'a>, - T: serde::de::Deserialize<'a>, - { - crate::internal::deserialize_in_place(reader, self, place) - } - - /// Deserializes a slice of bytes with state `seed` using this configuration. - #[inline(always)] - fn deserialize_seed<'a, T: serde::de::DeserializeSeed<'a>>( - self, - seed: T, - bytes: &'a [u8], - ) -> Result { - crate::internal::deserialize_seed(seed, bytes, self) - } - - /// Deserializes an object directly from a `Read`er using this configuration - /// - /// If this returns an `Error`, `reader` may be in an invalid state. - #[inline(always)] - fn deserialize_from(self, reader: R) -> Result { - crate::internal::deserialize_from(reader, self) - } - - /// Deserializes an object directly from a `Read`er with state `seed` using this configuration - /// - /// If this returns an `Error`, `reader` may be in an invalid state. - #[inline(always)] - fn deserialize_from_seed<'a, R: Read, T: serde::de::DeserializeSeed<'a>>( - self, - seed: T, - reader: R, - ) -> Result { - crate::internal::deserialize_from_seed(seed, reader, self) - } - - /// Deserializes an object from a custom `BincodeRead`er using the default configuration. - /// It is highly recommended to use `deserialize_from` unless you need to implement - /// `BincodeRead` for performance reasons. - /// - /// If this returns an `Error`, `reader` may be in an invalid state. - #[inline(always)] - fn deserialize_from_custom<'a, R: BincodeRead<'a>, T: serde::de::DeserializeOwned>( - self, - reader: R, - ) -> Result { - crate::internal::deserialize_from_custom(reader, self) - } - - /// Deserializes an object from a custom `BincodeRead`er with state `seed` using the default - /// configuration. It is highly recommended to use `deserialize_from` unless you need to - /// implement `BincodeRead` for performance reasons. - /// - /// If this returns an `Error`, `reader` may be in an invalid state. - #[inline(always)] - fn deserialize_from_custom_seed<'a, R: BincodeRead<'a>, T: serde::de::DeserializeSeed<'a>>( - self, - seed: T, - reader: R, - ) -> Result { - crate::internal::deserialize_from_custom_seed(seed, reader, self) - } -} - -impl Options for T {} - -/// A configuration struct with a user-specified byte limit -#[derive(Clone, Copy)] -pub struct WithOtherLimit { - _options: O, - pub(crate) new_limit: L, -} - -/// A configuration struct with a user-specified endian order -#[derive(Clone, Copy)] -pub struct WithOtherEndian { - options: O, - _endian: PhantomData, -} - -/// A configuration struct with a user-specified length encoding -#[derive(Clone, Copy)] -pub struct WithOtherIntEncoding { - options: O, - _length: PhantomData, -} - -/// A configuration struct with a user-specified trailing bytes behavior. -#[derive(Clone, Copy)] -pub struct WithOtherTrailing { - options: O, - _trailing: PhantomData, -} - -impl WithOtherLimit { - #[inline(always)] - pub(crate) fn new(options: O, limit: L) -> WithOtherLimit { - WithOtherLimit { - _options: options, - new_limit: limit, - } - } -} - -impl WithOtherEndian { - #[inline(always)] - pub(crate) fn new(options: O) -> WithOtherEndian { - WithOtherEndian { - options, - _endian: PhantomData, - } - } -} - -impl WithOtherIntEncoding { - #[inline(always)] - pub(crate) fn new(options: O) -> WithOtherIntEncoding { - WithOtherIntEncoding { - options, - _length: PhantomData, - } - } -} - -impl WithOtherTrailing { - #[inline(always)] - pub(crate) fn new(options: O) -> WithOtherTrailing { - WithOtherTrailing { - options, - _trailing: PhantomData, - } - } -} - -impl InternalOptions for WithOtherEndian { - type Limit = O::Limit; - type Endian = E; - type IntEncoding = O::IntEncoding; - type Trailing = O::Trailing; - #[inline(always)] - fn limit(&mut self) -> &mut O::Limit { - self.options.limit() - } -} - -impl InternalOptions for WithOtherLimit { - type Limit = L; - type Endian = O::Endian; - type IntEncoding = O::IntEncoding; - type Trailing = O::Trailing; - fn limit(&mut self) -> &mut L { - &mut self.new_limit - } -} - -impl InternalOptions for WithOtherIntEncoding { - type Limit = O::Limit; - type Endian = O::Endian; - type IntEncoding = I; - type Trailing = O::Trailing; - - fn limit(&mut self) -> &mut O::Limit { - self.options.limit() - } -} - -impl InternalOptions for WithOtherTrailing { - type Limit = O::Limit; - type Endian = O::Endian; - type IntEncoding = O::IntEncoding; - type Trailing = T; - - fn limit(&mut self) -> &mut O::Limit { - self.options.limit() - } -} - -mod internal { - use super::*; - - pub trait InternalOptions { - type Limit: SizeLimit + 'static; - type Endian: BincodeByteOrder + 'static; - type IntEncoding: IntEncoding + 'static; - type Trailing: TrailingBytes + 'static; - - fn limit(&mut self) -> &mut Self::Limit; - } - - impl<'a, O: InternalOptions> InternalOptions for &'a mut O { - type Limit = O::Limit; - type Endian = O::Endian; - type IntEncoding = O::IntEncoding; - type Trailing = O::Trailing; - - #[inline(always)] - fn limit(&mut self) -> &mut Self::Limit { - (*self).limit() - } - } -} diff --git a/src/config/trailing.rs b/src/config/trailing.rs deleted file mode 100644 index e78dc19eb..000000000 --- a/src/config/trailing.rs +++ /dev/null @@ -1,37 +0,0 @@ -use crate::de::read::SliceReader; -use crate::{ErrorKind, Result}; - -/// A trait for erroring deserialization if not all bytes were read. -pub trait TrailingBytes { - /// Checks a given slice reader to determine if deserialization used all bytes in the slice. - fn check_end(reader: &SliceReader) -> Result<()>; -} - -/// A TrailingBytes config that will allow trailing bytes in slices after deserialization. -#[derive(Copy, Clone)] -pub struct AllowTrailing; - -/// A TrailingBytes config that will cause bincode to produce an error if bytes are left over in the slice when deserialization is complete. - -#[derive(Copy, Clone)] -pub struct RejectTrailing; - -impl TrailingBytes for AllowTrailing { - #[inline(always)] - fn check_end(_reader: &SliceReader) -> Result<()> { - Ok(()) - } -} - -impl TrailingBytes for RejectTrailing { - #[inline(always)] - fn check_end(reader: &SliceReader) -> Result<()> { - if reader.is_finished() { - Ok(()) - } else { - Err(Box::new(ErrorKind::Custom( - "Slice had bytes remaining after deserialization".to_string(), - ))) - } - } -} diff --git a/src/de/decoder.rs b/src/de/decoder.rs new file mode 100644 index 000000000..df9756a40 --- /dev/null +++ b/src/de/decoder.rs @@ -0,0 +1,58 @@ +use super::{ + read::{BorrowReader, Reader}, + BorrowDecoder, Decoder, +}; +use crate::{config::Config, utils::Sealed}; + +/// A Decoder that reads bytes from a given reader `R`. +/// +/// This struct should rarely be used. +/// In most cases, prefer any of the `decode` functions. +/// +/// The ByteOrder that is chosen will impact the endianness that +/// is used to read integers out of the reader. +/// +/// ``` +/// # let slice: &[u8] = &[0, 0, 0, 0]; +/// # let some_reader = bincode::de::read::SliceReader::new(slice); +/// use bincode::de::{DecoderImpl, Decode}; +/// use bincode::config; +/// let mut decoder = DecoderImpl::new(some_reader, config::Configuration::standard()); +/// // this u32 can be any Decode +/// let value = u32::decode(&mut decoder).unwrap(); +/// ``` +pub struct DecoderImpl { + reader: R, + config: C, +} + +impl DecoderImpl { + /// Construct a new Decoder + pub fn new(reader: R, config: C) -> DecoderImpl { + DecoderImpl { reader, config } + } +} + +impl<'a, R, C: Config> Sealed for &'a mut DecoderImpl {} + +impl<'a, 'de, R: BorrowReader<'de>, C: Config> BorrowDecoder<'de> for &'a mut DecoderImpl { + type BR = R; + + fn borrow_reader(&mut self) -> &mut Self::BR { + &mut self.reader + } +} + +impl<'a, R: Reader, C: Config> Decoder for &'a mut DecoderImpl { + type R = R; + + type C = C; + + fn reader(&mut self) -> &mut Self::R { + &mut self.reader + } + + fn config(&self) -> &Self::C { + &self.config + } +} diff --git a/src/de/impl_core.rs b/src/de/impl_core.rs new file mode 100644 index 000000000..a5dc4c495 --- /dev/null +++ b/src/de/impl_core.rs @@ -0,0 +1,186 @@ +#![allow(unused_unsafe)] + +//! Contains implementations for rust core that have not been stabilized +//! +//! Functions in this are expected to be properly peer reviewed by the community +//! +//! Any modifications done are purely to make the code compatible with bincode + +use core::mem::{self, MaybeUninit}; + +/// Pulls `N` items from `iter` and returns them as an array. If the iterator +/// yields fewer than `N` items, `None` is returned and all already yielded +/// items are dropped. +/// +/// Since the iterator is passed as a mutable reference and this function calls +/// `next` at most `N` times, the iterator can still be used afterwards to +/// retrieve the remaining items. +/// +/// If `iter.next()` panicks, all items already yielded by the iterator are +/// dropped. +#[allow(clippy::while_let_on_iterator)] +pub fn collect_into_array(iter: &mut I) -> Option> +where + I: Iterator>, +{ + if N == 0 { + // SAFETY: An empty array is always inhabited and has no validity invariants. + return unsafe { Some(Ok(mem::zeroed())) }; + } + + struct Guard<'a, T, const N: usize> { + array_mut: &'a mut [MaybeUninit; N], + initialized: usize, + } + + impl Drop for Guard<'_, T, N> { + fn drop(&mut self) { + debug_assert!(self.initialized <= N); + + // SAFETY: this slice will contain only initialized objects. + unsafe { + core::ptr::drop_in_place(slice_assume_init_mut( + &mut self.array_mut.get_unchecked_mut(..self.initialized), + )); + } + } + } + + let mut array = uninit_array::(); + let mut guard = Guard { + array_mut: &mut array, + initialized: 0, + }; + + while let Some(item_rslt) = iter.next() { + let item = match item_rslt { + Err(err) => { + return Some(Err(err)); + } + Ok(elem) => elem, + }; + + // SAFETY: `guard.initialized` starts at 0, is increased by one in the + // loop and the loop is aborted once it reaches N (which is + // `array.len()`). + unsafe { + guard + .array_mut + .get_unchecked_mut(guard.initialized) + .write(item); + } + guard.initialized += 1; + + // Check if the whole array was initialized. + if guard.initialized == N { + mem::forget(guard); + + // SAFETY: the condition above asserts that all elements are + // initialized. + let out = unsafe { array_assume_init(array) }; + return Some(Ok(out)); + } + } + + // This is only reached if the iterator is exhausted before + // `guard.initialized` reaches `N`. Also note that `guard` is dropped here, + // dropping all already initialized elements. + None +} + +/// Assuming all the elements are initialized, get a mutable slice to them. +/// +/// # Safety +/// +/// It is up to the caller to guarantee that the `MaybeUninit` elements +/// really are in an initialized state. +/// Calling this when the content is not yet fully initialized causes undefined behavior. +/// +/// See [`assume_init_mut`] for more details and examples. +/// +/// [`assume_init_mut`]: MaybeUninit::assume_init_mut +// #[unstable(feature = "maybe_uninit_slice", issue = "63569")] +// #[rustc_const_unstable(feature = "const_maybe_uninit_assume_init", issue = "none")] +#[inline(always)] +pub unsafe fn slice_assume_init_mut(slice: &mut [MaybeUninit]) -> &mut [T] { + // SAFETY: similar to safety notes for `slice_get_ref`, but we have a + // mutable reference which is also guaranteed to be valid for writes. + unsafe { &mut *(slice as *mut [MaybeUninit] as *mut [T]) } +} + +/// Create a new array of `MaybeUninit` items, in an uninitialized state. +/// +/// Note: in a future Rust version this method may become unnecessary +/// when Rust allows +/// [inline const expressions](https://github.com/rust-lang/rust/issues/76001). +/// The example below could then use `let mut buf = [const { MaybeUninit::::uninit() }; 32];`. +/// +/// # Examples +/// +/// ```ignore +/// #![feature(maybe_uninit_uninit_array, maybe_uninit_extra, maybe_uninit_slice)] +/// +/// use std::mem::MaybeUninit; +/// +/// extern "C" { +/// fn read_into_buffer(ptr: *mut u8, max_len: usize) -> usize; +/// } +/// +/// /// Returns a (possibly smaller) slice of data that was actually read +/// fn read(buf: &mut [MaybeUninit]) -> &[u8] { +/// unsafe { +/// let len = read_into_buffer(buf.as_mut_ptr() as *mut u8, buf.len()); +/// MaybeUninit::slice_assume_init_ref(&buf[..len]) +/// } +/// } +/// +/// let mut buf: [MaybeUninit; 32] = MaybeUninit::uninit_array(); +/// let data = read(&mut buf); +/// ``` +// #[unstable(feature = "maybe_uninit_uninit_array", issue = "none")] +// #[rustc_const_unstable(feature = "maybe_uninit_uninit_array", issue = "none")] +#[inline(always)] +fn uninit_array() -> [MaybeUninit; LEN] { + // SAFETY: An uninitialized `[MaybeUninit<_>; LEN]` is valid. + unsafe { MaybeUninit::<[MaybeUninit; LEN]>::uninit().assume_init() } +} + +/// Extracts the values from an array of `MaybeUninit` containers. +/// +/// # Safety +/// +/// It is up to the caller to guarantee that all elements of the array are +/// in an initialized state. +/// +/// # Examples +/// +/// ```ignore +/// #![feature(maybe_uninit_uninit_array)] +/// #![feature(maybe_uninit_array_assume_init)] +/// use std::mem::MaybeUninit; +/// +/// let mut array: [MaybeUninit; 3] = MaybeUninit::uninit_array(); +/// array[0].write(0); +/// array[1].write(1); +/// array[2].write(2); +/// +/// // SAFETY: Now safe as we initialised all elements +/// let array = unsafe { +/// MaybeUninit::array_assume_init(array) +/// }; +/// +/// assert_eq!(array, [0, 1, 2]); +/// ``` +// #[unstable(feature = "maybe_uninit_array_assume_init", issue = "80908")] +#[inline(always)] +pub unsafe fn array_assume_init(array: [MaybeUninit; N]) -> [T; N] { + // SAFETY: + // * The caller guarantees that all elements of the array are initialized + // * `MaybeUninit` and T are guaranteed to have the same layout + // * `MaybeUninit` does not drop, so there are no double-frees + // And thus the conversion is safe + unsafe { + // intrinsics::assert_inhabited::<[T; N]>(); + (&array as *const _ as *const [T; N]).read() + } +} diff --git a/src/de/impl_tuples.rs b/src/de/impl_tuples.rs new file mode 100644 index 000000000..47ab21ebe --- /dev/null +++ b/src/de/impl_tuples.rs @@ -0,0 +1,141 @@ +use super::{Decode, Decoder}; +use crate::error::DecodeError; + +impl Decode for (A,) +where + A: Decode, +{ + fn decode<_D: Decoder>(mut decoder: _D) -> Result { + Ok((A::decode(&mut decoder)?,)) + } +} + +impl Decode for (A, B) +where + A: Decode, + B: Decode, +{ + fn decode<_D: Decoder>(mut decoder: _D) -> Result { + Ok((A::decode(&mut decoder)?, B::decode(&mut decoder)?)) + } +} + +impl Decode for (A, B, C) +where + A: Decode, + B: Decode, + C: Decode, +{ + fn decode<_D: Decoder>(mut decoder: _D) -> Result { + Ok(( + A::decode(&mut decoder)?, + B::decode(&mut decoder)?, + C::decode(&mut decoder)?, + )) + } +} + +impl Decode for (A, B, C, D) +where + A: Decode, + B: Decode, + C: Decode, + D: Decode, +{ + fn decode<_D: Decoder>(mut decoder: _D) -> Result { + Ok(( + A::decode(&mut decoder)?, + B::decode(&mut decoder)?, + C::decode(&mut decoder)?, + D::decode(&mut decoder)?, + )) + } +} + +impl Decode for (A, B, C, D, E) +where + A: Decode, + B: Decode, + C: Decode, + D: Decode, + E: Decode, +{ + fn decode<_D: Decoder>(mut decoder: _D) -> Result { + Ok(( + A::decode(&mut decoder)?, + B::decode(&mut decoder)?, + C::decode(&mut decoder)?, + D::decode(&mut decoder)?, + E::decode(&mut decoder)?, + )) + } +} + +impl Decode for (A, B, C, D, E, F) +where + A: Decode, + B: Decode, + C: Decode, + D: Decode, + E: Decode, + F: Decode, +{ + fn decode<_D: Decoder>(mut decoder: _D) -> Result { + Ok(( + A::decode(&mut decoder)?, + B::decode(&mut decoder)?, + C::decode(&mut decoder)?, + D::decode(&mut decoder)?, + E::decode(&mut decoder)?, + F::decode(&mut decoder)?, + )) + } +} + +impl Decode for (A, B, C, D, E, F, G) +where + A: Decode, + B: Decode, + C: Decode, + D: Decode, + E: Decode, + F: Decode, + G: Decode, +{ + fn decode<_D: Decoder>(mut decoder: _D) -> Result { + Ok(( + A::decode(&mut decoder)?, + B::decode(&mut decoder)?, + C::decode(&mut decoder)?, + D::decode(&mut decoder)?, + E::decode(&mut decoder)?, + F::decode(&mut decoder)?, + G::decode(&mut decoder)?, + )) + } +} + +impl Decode for (A, B, C, D, E, F, G, H) +where + A: Decode, + B: Decode, + C: Decode, + D: Decode, + E: Decode, + F: Decode, + G: Decode, + H: Decode, +{ + fn decode<_D: Decoder>(mut decoder: _D) -> Result { + Ok(( + A::decode(&mut decoder)?, + B::decode(&mut decoder)?, + C::decode(&mut decoder)?, + D::decode(&mut decoder)?, + E::decode(&mut decoder)?, + F::decode(&mut decoder)?, + G::decode(&mut decoder)?, + H::decode(&mut decoder)?, + )) + } +} diff --git a/src/de/impls.rs b/src/de/impls.rs new file mode 100644 index 000000000..4efff0469 --- /dev/null +++ b/src/de/impls.rs @@ -0,0 +1,573 @@ +use super::{ + read::{BorrowReader, Reader}, + BorrowDecode, BorrowDecoder, Decode, Decoder, +}; +use crate::{ + config::{ + Endian, IntEncoding, InternalArrayLengthConfig, InternalEndianConfig, + InternalIntEncodingConfig, + }, + error::{DecodeError, IntegerType}, +}; +use core::{ + any::TypeId, + cell::{Cell, RefCell}, + num::{ + NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroIsize, NonZeroU128, + NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize, + }, + ops::{Bound, Range, RangeInclusive}, + time::Duration, +}; + +impl Decode for bool { + fn decode(decoder: D) -> Result { + match u8::decode(decoder)? { + 0 => Ok(false), + 1 => Ok(true), + x => Err(DecodeError::InvalidBooleanValue(x)), + } + } +} + +impl Decode for u8 { + #[inline] + fn decode(mut decoder: D) -> Result { + if let Some(buf) = decoder.reader().peek_read(1) { + let byte = buf[0]; + decoder.reader().consume(1); + Ok(byte) + } else { + let mut bytes = [0u8; 1]; + decoder.reader().read(&mut bytes)?; + Ok(bytes[0]) + } + } +} + +impl Decode for NonZeroU8 { + fn decode(decoder: D) -> Result { + NonZeroU8::new(u8::decode(decoder)?).ok_or(DecodeError::NonZeroTypeIsZero { + non_zero_type: IntegerType::U8, + }) + } +} + +impl Decode for u16 { + fn decode(mut decoder: D) -> Result { + match D::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_decode_u16(decoder.reader(), D::C::ENDIAN) + } + IntEncoding::Fixed => { + let mut bytes = [0u8; 2]; + decoder.reader().read(&mut bytes)?; + Ok(match D::C::ENDIAN { + Endian::Little => u16::from_le_bytes(bytes), + Endian::Big => u16::from_be_bytes(bytes), + }) + } + } + } +} + +impl Decode for NonZeroU16 { + fn decode(decoder: D) -> Result { + NonZeroU16::new(u16::decode(decoder)?).ok_or(DecodeError::NonZeroTypeIsZero { + non_zero_type: IntegerType::U16, + }) + } +} + +impl Decode for u32 { + fn decode(mut decoder: D) -> Result { + match D::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_decode_u32(decoder.reader(), D::C::ENDIAN) + } + IntEncoding::Fixed => { + let mut bytes = [0u8; 4]; + decoder.reader().read(&mut bytes)?; + Ok(match D::C::ENDIAN { + Endian::Little => u32::from_le_bytes(bytes), + Endian::Big => u32::from_be_bytes(bytes), + }) + } + } + } +} + +impl Decode for NonZeroU32 { + fn decode(decoder: D) -> Result { + NonZeroU32::new(u32::decode(decoder)?).ok_or(DecodeError::NonZeroTypeIsZero { + non_zero_type: IntegerType::U32, + }) + } +} + +impl Decode for u64 { + fn decode(mut decoder: D) -> Result { + match D::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_decode_u64(decoder.reader(), D::C::ENDIAN) + } + IntEncoding::Fixed => { + let mut bytes = [0u8; 8]; + decoder.reader().read(&mut bytes)?; + Ok(match D::C::ENDIAN { + Endian::Little => u64::from_le_bytes(bytes), + Endian::Big => u64::from_be_bytes(bytes), + }) + } + } + } +} + +impl Decode for NonZeroU64 { + fn decode(decoder: D) -> Result { + NonZeroU64::new(u64::decode(decoder)?).ok_or(DecodeError::NonZeroTypeIsZero { + non_zero_type: IntegerType::U64, + }) + } +} + +impl Decode for u128 { + fn decode(mut decoder: D) -> Result { + match D::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_decode_u128(decoder.reader(), D::C::ENDIAN) + } + IntEncoding::Fixed => { + let mut bytes = [0u8; 16]; + decoder.reader().read(&mut bytes)?; + Ok(match D::C::ENDIAN { + Endian::Little => u128::from_le_bytes(bytes), + Endian::Big => u128::from_be_bytes(bytes), + }) + } + } + } +} + +impl Decode for NonZeroU128 { + fn decode(decoder: D) -> Result { + NonZeroU128::new(u128::decode(decoder)?).ok_or(DecodeError::NonZeroTypeIsZero { + non_zero_type: IntegerType::U128, + }) + } +} + +impl Decode for usize { + fn decode(mut decoder: D) -> Result { + match D::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_decode_usize(decoder.reader(), D::C::ENDIAN) + } + IntEncoding::Fixed => { + let mut bytes = [0u8; 8]; + decoder.reader().read(&mut bytes)?; + Ok(match D::C::ENDIAN { + Endian::Little => u64::from_le_bytes(bytes), + Endian::Big => u64::from_be_bytes(bytes), + } as usize) + } + } + } +} + +impl Decode for NonZeroUsize { + fn decode(decoder: D) -> Result { + NonZeroUsize::new(usize::decode(decoder)?).ok_or(DecodeError::NonZeroTypeIsZero { + non_zero_type: IntegerType::Usize, + }) + } +} + +impl Decode for i8 { + fn decode(mut decoder: D) -> Result { + let mut bytes = [0u8; 1]; + decoder.reader().read(&mut bytes)?; + Ok(bytes[0] as i8) + } +} + +impl Decode for NonZeroI8 { + fn decode(decoder: D) -> Result { + NonZeroI8::new(i8::decode(decoder)?).ok_or(DecodeError::NonZeroTypeIsZero { + non_zero_type: IntegerType::I8, + }) + } +} + +impl Decode for i16 { + fn decode(mut decoder: D) -> Result { + match D::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_decode_i16(decoder.reader(), D::C::ENDIAN) + } + IntEncoding::Fixed => { + let mut bytes = [0u8; 2]; + decoder.reader().read(&mut bytes)?; + Ok(match D::C::ENDIAN { + Endian::Little => i16::from_le_bytes(bytes), + Endian::Big => i16::from_be_bytes(bytes), + }) + } + } + } +} + +impl Decode for NonZeroI16 { + fn decode(decoder: D) -> Result { + NonZeroI16::new(i16::decode(decoder)?).ok_or(DecodeError::NonZeroTypeIsZero { + non_zero_type: IntegerType::I16, + }) + } +} + +impl Decode for i32 { + fn decode(mut decoder: D) -> Result { + match D::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_decode_i32(decoder.reader(), D::C::ENDIAN) + } + IntEncoding::Fixed => { + let mut bytes = [0u8; 4]; + decoder.reader().read(&mut bytes)?; + Ok(match D::C::ENDIAN { + Endian::Little => i32::from_le_bytes(bytes), + Endian::Big => i32::from_be_bytes(bytes), + }) + } + } + } +} + +impl Decode for NonZeroI32 { + fn decode(decoder: D) -> Result { + NonZeroI32::new(i32::decode(decoder)?).ok_or(DecodeError::NonZeroTypeIsZero { + non_zero_type: IntegerType::I32, + }) + } +} + +impl Decode for i64 { + fn decode(mut decoder: D) -> Result { + match D::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_decode_i64(decoder.reader(), D::C::ENDIAN) + } + IntEncoding::Fixed => { + let mut bytes = [0u8; 8]; + decoder.reader().read(&mut bytes)?; + Ok(match D::C::ENDIAN { + Endian::Little => i64::from_le_bytes(bytes), + Endian::Big => i64::from_be_bytes(bytes), + }) + } + } + } +} + +impl Decode for NonZeroI64 { + fn decode(decoder: D) -> Result { + NonZeroI64::new(i64::decode(decoder)?).ok_or(DecodeError::NonZeroTypeIsZero { + non_zero_type: IntegerType::I64, + }) + } +} + +impl Decode for i128 { + fn decode(mut decoder: D) -> Result { + match D::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_decode_i128(decoder.reader(), D::C::ENDIAN) + } + IntEncoding::Fixed => { + let mut bytes = [0u8; 16]; + decoder.reader().read(&mut bytes)?; + Ok(match D::C::ENDIAN { + Endian::Little => i128::from_le_bytes(bytes), + Endian::Big => i128::from_be_bytes(bytes), + }) + } + } + } +} + +impl Decode for NonZeroI128 { + fn decode(decoder: D) -> Result { + NonZeroI128::new(i128::decode(decoder)?).ok_or(DecodeError::NonZeroTypeIsZero { + non_zero_type: IntegerType::I128, + }) + } +} + +impl Decode for isize { + fn decode(mut decoder: D) -> Result { + match D::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_decode_isize(decoder.reader(), D::C::ENDIAN) + } + IntEncoding::Fixed => { + let mut bytes = [0u8; 8]; + decoder.reader().read(&mut bytes)?; + Ok(match D::C::ENDIAN { + Endian::Little => i64::from_le_bytes(bytes), + Endian::Big => i64::from_be_bytes(bytes), + } as isize) + } + } + } +} + +impl Decode for NonZeroIsize { + fn decode(decoder: D) -> Result { + NonZeroIsize::new(isize::decode(decoder)?).ok_or(DecodeError::NonZeroTypeIsZero { + non_zero_type: IntegerType::Isize, + }) + } +} + +impl Decode for f32 { + fn decode(mut decoder: D) -> Result { + let mut bytes = [0u8; 4]; + decoder.reader().read(&mut bytes)?; + Ok(match D::C::ENDIAN { + Endian::Little => f32::from_le_bytes(bytes), + Endian::Big => f32::from_be_bytes(bytes), + }) + } +} + +impl Decode for f64 { + fn decode(mut decoder: D) -> Result { + let mut bytes = [0u8; 8]; + decoder.reader().read(&mut bytes)?; + Ok(match D::C::ENDIAN { + Endian::Little => f64::from_le_bytes(bytes), + Endian::Big => f64::from_be_bytes(bytes), + }) + } +} + +impl Decode for char { + fn decode(mut decoder: D) -> Result { + let mut array = [0u8; 4]; + + // Look at the first byte to see how many bytes must be read + decoder.reader().read(&mut array[..1])?; + + let width = utf8_char_width(array[0]); + if width == 0 { + return Err(DecodeError::InvalidCharEncoding(array)); + } + if width == 1 { + return Ok(array[0] as char); + } + + // read the remaining pain + decoder.reader().read(&mut array[1..width])?; + let res = core::str::from_utf8(&array[..width]) + .ok() + .and_then(|s| s.chars().next()) + .ok_or(DecodeError::InvalidCharEncoding(array))?; + Ok(res) + } +} + +impl<'a, 'de: 'a> BorrowDecode<'de> for &'a [u8] { + fn borrow_decode>(mut decoder: D) -> Result { + let len = usize::decode(&mut decoder)?; + decoder.borrow_reader().take_bytes(len) + } +} + +impl<'a, 'de: 'a> BorrowDecode<'de> for &'a str { + fn borrow_decode>(decoder: D) -> Result { + let slice: &[u8] = BorrowDecode::borrow_decode(decoder)?; + core::str::from_utf8(slice).map_err(DecodeError::Utf8) + } +} + +impl Decode for [T; N] +where + T: Decode + Sized + 'static, +{ + fn decode(mut decoder: D) -> Result { + if !D::C::SKIP_FIXED_ARRAY_LENGTH { + let length = usize::decode(&mut decoder)?; + if length != N { + return Err(DecodeError::ArrayLengthMismatch { + found: length, + required: N, + }); + } + } + + if TypeId::of::() == TypeId::of::() { + let mut buf = [0u8; N]; + decoder.reader().read(&mut buf)?; + let ptr = &mut buf as *mut _ as *mut [T; N]; + + // Safety: we know that T is a u8, so it is perfectly safe to + // translate an array of u8 into an array of T + let res = unsafe { ptr.read() }; + Ok(res) + } else { + let result = + super::impl_core::collect_into_array(&mut (0..N).map(|_| T::decode(&mut decoder))); + + // result is only None if N does not match the values of `(0..N)`, which it always should + // So this unsafe should never occur + result.unwrap() + } + } +} + +impl Decode for core::marker::PhantomData { + fn decode(_: D) -> Result { + Ok(core::marker::PhantomData) + } +} + +impl Decode for Option +where + T: Decode, +{ + fn decode(mut decoder: D) -> Result { + let is_some = u8::decode(&mut decoder)?; + match is_some { + 0 => Ok(None), + 1 => { + let val = T::decode(decoder)?; + Ok(Some(val)) + } + x => Err(DecodeError::UnexpectedVariant { + found: x as u32, + max: 1, + min: 0, + type_name: core::any::type_name::>(), + }), + } + } +} + +impl Decode for Result +where + T: Decode, + U: Decode, +{ + fn decode(mut decoder: D) -> Result { + let is_ok = u8::decode(&mut decoder)?; + match is_ok { + 0 => { + let t = T::decode(decoder)?; + Ok(Ok(t)) + } + 1 => { + let u = U::decode(decoder)?; + Ok(Err(u)) + } + x => Err(DecodeError::UnexpectedVariant { + found: x as u32, + max: 1, + min: 0, + type_name: core::any::type_name::>(), + }), + } + } +} + +impl Decode for Cell +where + T: Decode, +{ + fn decode(decoder: D) -> Result { + let t = T::decode(decoder)?; + Ok(Cell::new(t)) + } +} + +impl Decode for RefCell +where + T: Decode, +{ + fn decode(decoder: D) -> Result { + let t = T::decode(decoder)?; + Ok(RefCell::new(t)) + } +} + +impl Decode for Duration { + fn decode(mut decoder: D) -> Result { + let secs = Decode::decode(&mut decoder)?; + let nanos = Decode::decode(&mut decoder)?; + Ok(Duration::new(secs, nanos)) + } +} + +impl Decode for Range +where + T: Decode, +{ + fn decode(mut decoder: D) -> Result { + let min = T::decode(&mut decoder)?; + let max = T::decode(&mut decoder)?; + Ok(min..max) + } +} + +impl Decode for RangeInclusive +where + T: Decode, +{ + fn decode(mut decoder: D) -> Result { + let min = T::decode(&mut decoder)?; + let max = T::decode(&mut decoder)?; + Ok(RangeInclusive::new(min, max)) + } +} + +impl Decode for Bound +where + T: Decode, +{ + fn decode(mut decoder: D) -> Result { + match u32::decode(&mut decoder)? { + 0 => Ok(Bound::Unbounded), + 1 => Ok(Bound::Included(T::decode(decoder)?)), + 2 => Ok(Bound::Excluded(T::decode(decoder)?)), + x => Err(DecodeError::UnexpectedVariant { + min: 0, + max: 2, + found: x, + type_name: core::any::type_name::>(), + }), + } + } +} + +const UTF8_CHAR_WIDTH: [u8; 256] = [ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x1F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x3F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x5F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, // 0x7F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 0x9F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, // 0xBF + 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, // 0xDF + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF + 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF +]; + +// This function is a copy of core::str::utf8_char_width +const fn utf8_char_width(b: u8) -> usize { + UTF8_CHAR_WIDTH[b as usize] as usize +} diff --git a/src/de/mod.rs b/src/de/mod.rs index 6b2208a2f..05d47477f 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -1,539 +1,95 @@ -use crate::config::{BincodeByteOrder, Options}; -use std::io::Read; +//! Decoder-based structs and traits. -use self::read::{BincodeRead, IoReader, SliceReader}; -use crate::config::{IntEncoding, SizeLimit}; -use crate::{Error, ErrorKind, Result}; -use byteorder::ReadBytesExt; -use serde::de::Error as DeError; -use serde::de::IntoDeserializer; +mod decoder; +mod impl_core; +mod impl_tuples; +mod impls; + +use self::read::{BorrowReader, Reader}; +use crate::{config::Config, error::DecodeError, utils::Sealed}; -/// Specialized ways to read data into bincode. pub mod read; -/// A Deserializer that reads bytes from a buffer. +pub use self::decoder::DecoderImpl; + +/// Trait that makes a type able to be decoded, akin to serde's `DeserializeOwned` trait. /// -/// This struct should rarely be used. -/// In most cases, prefer the `deserialize_from` function. +/// This trait should be implemented for types which do not have references to data in the reader. For types that contain e.g. `&str` and `&[u8]`, implement [BorrowDecode] instead. /// -/// The ByteOrder that is chosen will impact the endianness that -/// is used to read integers out of the reader. +/// Whenever you implement `Decode` for your type, the base trait `BorrowDecode` is automatically implemented. /// -/// ```ignore -/// let d = Deserializer::new(&mut some_reader, SizeLimit::new()); -/// serde::Deserialize::deserialize(&mut deserializer); -/// let bytes_read = d.bytes_read(); -/// ``` -pub struct Deserializer { - pub(crate) reader: R, - options: O, -} - -#[inline(never)] -#[cold] -fn bincode_read_cold(reader: &mut R) -> Result<[u8; N]> -where - R: std::io::Read, -{ - let mut buf = [0u8; N]; - reader.read_exact(&mut buf)?; - Ok(buf) +/// This trait will be automatically implemented if you enable the `derive` feature and add `#[derive(bincode::Decode)]` to your type. Note that if the type contains any lifetimes, `BorrowDecode` will be implemented instead. +pub trait Decode: for<'de> BorrowDecode<'de> { + /// Attempt to decode this type with the given [Decode]. + fn decode(decoder: D) -> Result; } -macro_rules! impl_deserialize_literal { - ($name:ident : $ty:ty = $read:ident()) => { - #[inline] - pub(crate) fn $name(&mut self) -> Result<$ty> { - const SIZE: usize = core::mem::size_of::<$ty>(); - let read = <::Endian as byteorder::ByteOrder>::$read; - self.read_literal_type::<$ty>()?; - if let Some(buf) = self.reader.peek_read(SIZE) { - let v = read(buf); - self.reader.consume(SIZE); - Ok(v) - } else { - let bytes = bincode_read_cold::<_, SIZE>(&mut self.reader)?; - Ok(read(&bytes)) - } - } - }; -} - -impl<'de, IR: Read, O: Options> Deserializer, O> { - /// Creates a new Deserializer with a given `Read`er and options. - pub fn with_reader(r: IR, options: O) -> Self { - Deserializer { - reader: IoReader::new(r), - options, - } - } +/// Trait that makes a type able to be decoded, akin to serde's `Deserialize` trait. +/// +/// This trait should be implemented for types that contain borrowed data, like `&str` and `&[u8]`. If your type does not have borrowed data, consider implementing [Decode] instead. +/// +/// This trait will be automatically implemented if you enable the `derive` feature and add `#[derive(bincode::Decode)]` to a type with a lifetime. +pub trait BorrowDecode<'de>: Sized { + /// Attempt to decode this type with the given [BorrowDecode]. + fn borrow_decode>(decoder: D) -> Result; } -impl<'de, O: Options> Deserializer, O> { - /// Creates a new Deserializer that will read from the given slice. - pub fn from_slice(slice: &'de [u8], options: O) -> Self { - Deserializer { - reader: SliceReader::new(slice), - options, - } +impl<'de, T: Decode> BorrowDecode<'de> for T { + fn borrow_decode(decoder: D) -> Result { + Decode::decode(decoder) } } -impl<'de, R: BincodeRead<'de>, O: Options> Deserializer { - /// Creates a new Deserializer with the given `BincodeRead`er - pub fn with_bincode_read(r: R, options: O) -> Deserializer { - Deserializer { reader: r, options } - } - - #[inline] - pub(crate) fn deserialize_byte(&mut self) -> Result { - self.read_literal_type::()?; - if let Some(buf) = self.reader.peek_read(1) { - let byte = buf[0]; - self.reader.consume(1); - Ok(byte) - } else { - self.reader.read_u8().map_err(Into::into) - } - } - - impl_deserialize_literal! { deserialize_literal_u16 : u16 = read_u16() } - impl_deserialize_literal! { deserialize_literal_u32 : u32 = read_u32() } - impl_deserialize_literal! { deserialize_literal_u64 : u64 = read_u64() } - - serde_if_integer128! { - impl_deserialize_literal! { deserialize_literal_u128 : u128 = read_u128() } - } - - fn read_bytes(&mut self, count: u64) -> Result<()> { - self.options.limit().add(count) - } +/// Any source that can decode basic types. This type is most notably implemented for [Decoder]. +pub trait Decoder: Sealed { + /// The concrete [Reader] type + type R: Reader; - fn read_literal_type(&mut self) -> Result<()> { - use std::mem::size_of; - self.read_bytes(size_of::() as u64) - } + /// The concrete [Config] type + type C: Config; - fn read_vec(&mut self) -> Result> { - let len = O::IntEncoding::deserialize_len(self)?; - self.read_bytes(len as u64)?; - self.reader.get_byte_buffer(len) - } + /// Returns a mutable reference to the reader + fn reader(&mut self) -> &mut Self::R; - fn read_string(&mut self) -> Result { - let vec = self.read_vec()?; - String::from_utf8(vec).map_err(|e| ErrorKind::InvalidUtf8Encoding(e.utf8_error()).into()) - } + /// Returns a mutable reference to the config + fn config(&self) -> &Self::C; } -macro_rules! impl_deserialize_int { - ($name:ident = $visitor_method:ident ($dser_method:ident)) => { - #[inline] - fn $name(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - visitor.$visitor_method(O::IntEncoding::$dser_method(self)?) - } - }; +/// Any source that can decode basic types. This type is most notably implemented for [Decoder]. +/// +/// This is an extension of [Decode] that can also return borrowed data. +pub trait BorrowDecoder<'de>: Decoder { + /// The concrete [BorrowReader] type + type BR: BorrowReader<'de>; + + /// Rerturns a mutable reference to the borrow reader + fn borrow_reader(&mut self) -> &mut Self::BR; } -impl<'de, 'a, R, O> serde::Deserializer<'de> for &'a mut Deserializer +impl<'a, T> Decoder for &'a mut T where - R: BincodeRead<'de>, - O: Options, + T: Decoder, { - type Error = Error; - - #[inline] - fn deserialize_any(self, _visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - Err(Box::new(ErrorKind::DeserializeAnyNotSupported)) - } - - fn deserialize_bool(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - match self.deserialize_byte()? { - 1 => visitor.visit_bool(true), - 0 => visitor.visit_bool(false), - value => Err(ErrorKind::InvalidBoolEncoding(value).into()), - } - } - - impl_deserialize_int!(deserialize_u16 = visit_u16(deserialize_u16)); - impl_deserialize_int!(deserialize_u32 = visit_u32(deserialize_u32)); - impl_deserialize_int!(deserialize_u64 = visit_u64(deserialize_u64)); - impl_deserialize_int!(deserialize_i16 = visit_i16(deserialize_i16)); - impl_deserialize_int!(deserialize_i32 = visit_i32(deserialize_i32)); - impl_deserialize_int!(deserialize_i64 = visit_i64(deserialize_i64)); - - fn deserialize_f32(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - self.read_literal_type::()?; - let value = self - .reader - .read_f32::<::Endian>()?; - visitor.visit_f32(value) - } - - fn deserialize_f64(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - self.read_literal_type::()?; - let value = self - .reader - .read_f64::<::Endian>()?; - visitor.visit_f64(value) - } - - serde_if_integer128! { - impl_deserialize_int!(deserialize_u128 = visit_u128(deserialize_u128)); - impl_deserialize_int!(deserialize_i128 = visit_i128(deserialize_i128)); - } - - #[inline] - fn deserialize_u8(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - visitor.visit_u8(self.deserialize_byte()? as u8) - } - - #[inline] - fn deserialize_i8(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - visitor.visit_i8(self.deserialize_byte()? as i8) - } - - fn deserialize_unit(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - visitor.visit_unit() - } - - fn deserialize_char(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - use std::str; - - let error = || ErrorKind::InvalidCharEncoding.into(); - - let mut buf = [0u8; 4]; - - // Look at the first byte to see how many bytes must be read - self.reader.read_exact(&mut buf[..1])?; - let width = utf8_char_width(buf[0]); - if width == 1 { - return visitor.visit_char(buf[0] as char); - } - if width == 0 { - return Err(error()); - } - - if self.reader.read_exact(&mut buf[1..width]).is_err() { - return Err(error()); - } - - let res = str::from_utf8(&buf[..width]) - .ok() - .and_then(|s| s.chars().next()) - .ok_or_else(error)?; - visitor.visit_char(res) - } + type R = T::R; - fn deserialize_str(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - let len = O::IntEncoding::deserialize_len(self)?; - self.read_bytes(len as u64)?; - self.reader.forward_read_str(len, visitor) - } - - fn deserialize_string(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - visitor.visit_string(self.read_string()?) - } - - fn deserialize_bytes(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - let len = O::IntEncoding::deserialize_len(self)?; - self.read_bytes(len as u64)?; - self.reader.forward_read_bytes(len, visitor) - } - - fn deserialize_byte_buf(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - visitor.visit_byte_buf(self.read_vec()?) - } - - fn deserialize_enum( - self, - _enum: &'static str, - _variants: &'static [&'static str], - visitor: V, - ) -> Result - where - V: serde::de::Visitor<'de>, - { - impl<'de, 'a, R: 'a, O> serde::de::EnumAccess<'de> for &'a mut Deserializer - where - R: BincodeRead<'de>, - O: Options, - { - type Error = Error; - type Variant = Self; - - fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> - where - V: serde::de::DeserializeSeed<'de>, - { - let idx: u32 = O::IntEncoding::deserialize_u32(self)?; - let val: Result<_> = seed.deserialize(idx.into_deserializer()); - Ok((val?, self)) - } - } - - visitor.visit_enum(self) - } - - fn deserialize_tuple(self, len: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - struct Access<'a, R: Read + 'a, O: Options + 'a> { - deserializer: &'a mut Deserializer, - len: usize, - } - - impl<'de, 'a, 'b: 'a, R: BincodeRead<'de> + 'b, O: Options> serde::de::SeqAccess<'de> - for Access<'a, R, O> - { - type Error = Error; - - fn next_element_seed(&mut self, seed: T) -> Result> - where - T: serde::de::DeserializeSeed<'de>, - { - if self.len > 0 { - self.len -= 1; - let value = - serde::de::DeserializeSeed::deserialize(seed, &mut *self.deserializer)?; - Ok(Some(value)) - } else { - Ok(None) - } - } - - fn size_hint(&self) -> Option { - Some(self.len) - } - } - - visitor.visit_seq(Access { - deserializer: self, - len, - }) - } - - fn deserialize_option(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - let value: u8 = serde::de::Deserialize::deserialize(&mut *self)?; - match value { - 0 => visitor.visit_none(), - 1 => visitor.visit_some(&mut *self), - v => Err(ErrorKind::InvalidTagEncoding(v as usize).into()), - } - } - - fn deserialize_seq(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - let len = O::IntEncoding::deserialize_len(self)?; - - self.deserialize_tuple(len, visitor) - } - - fn deserialize_map(self, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - struct Access<'a, R: Read + 'a, O: Options + 'a> { - deserializer: &'a mut Deserializer, - len: usize, - } - - impl<'de, 'a, 'b: 'a, R: BincodeRead<'de> + 'b, O: Options> serde::de::MapAccess<'de> - for Access<'a, R, O> - { - type Error = Error; + type C = T::C; - fn next_key_seed(&mut self, seed: K) -> Result> - where - K: serde::de::DeserializeSeed<'de>, - { - if self.len > 0 { - self.len -= 1; - let key = - serde::de::DeserializeSeed::deserialize(seed, &mut *self.deserializer)?; - Ok(Some(key)) - } else { - Ok(None) - } - } - - fn next_value_seed(&mut self, seed: V) -> Result - where - V: serde::de::DeserializeSeed<'de>, - { - let value = serde::de::DeserializeSeed::deserialize(seed, &mut *self.deserializer)?; - Ok(value) - } - - fn size_hint(&self) -> Option { - Some(self.len) - } - } - - let len = O::IntEncoding::deserialize_len(self)?; - - visitor.visit_map(Access { - deserializer: self, - len, - }) - } - - fn deserialize_struct( - self, - _name: &str, - fields: &'static [&'static str], - visitor: V, - ) -> Result - where - V: serde::de::Visitor<'de>, - { - self.deserialize_tuple(fields.len(), visitor) - } - - fn deserialize_identifier(self, _visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - let message = "Bincode does not support Deserializer::deserialize_identifier"; - Err(Error::custom(message)) - } - - fn deserialize_newtype_struct(self, _name: &str, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - visitor.visit_newtype_struct(self) - } - - fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - visitor.visit_unit() + fn reader(&mut self) -> &mut Self::R { + T::reader(self) } - fn deserialize_tuple_struct( - self, - _name: &'static str, - len: usize, - visitor: V, - ) -> Result - where - V: serde::de::Visitor<'de>, - { - self.deserialize_tuple(len, visitor) - } - - fn deserialize_ignored_any(self, _visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - let message = "Bincode does not support Deserializer::deserialize_ignored_any"; - Err(Error::custom(message)) - } - - fn is_human_readable(&self) -> bool { - false + fn config(&self) -> &Self::C { + T::config(self) } } -impl<'de, 'a, R, O> serde::de::VariantAccess<'de> for &'a mut Deserializer +impl<'a, 'de, T> BorrowDecoder<'de> for &'a mut T where - R: BincodeRead<'de>, - O: Options, + T: BorrowDecoder<'de>, { - type Error = Error; - - fn unit_variant(self) -> Result<()> { - Ok(()) - } + type BR = T::BR; - fn newtype_variant_seed(self, seed: T) -> Result - where - T: serde::de::DeserializeSeed<'de>, - { - serde::de::DeserializeSeed::deserialize(seed, self) + fn borrow_reader(&mut self) -> &mut Self::BR { + T::borrow_reader(self) } - - fn tuple_variant(self, len: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - serde::de::Deserializer::deserialize_tuple(self, len, visitor) - } - - fn struct_variant(self, fields: &'static [&'static str], visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - serde::de::Deserializer::deserialize_tuple(self, fields.len(), visitor) - } -} -static UTF8_CHAR_WIDTH: [u8; 256] = [ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, // 0x1F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, // 0x3F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, // 0x5F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, // 0x7F - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, // 0x9F - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, // 0xBF - 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, // 0xDF - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF - 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF -]; - -// This function is a copy of core::str::utf8_char_width -fn utf8_char_width(b: u8) -> usize { - UTF8_CHAR_WIDTH[b as usize] as usize } diff --git a/src/de/read.rs b/src/de/read.rs index 5d3a92a31..b0f6cc386 100644 --- a/src/de/read.rs +++ b/src/de/read.rs @@ -1,31 +1,25 @@ -use crate::error::Result; -use std::io; - -/// An optional Read trait for advanced Bincode usage. -/// -/// It is highly recommended to use bincode with `io::Read` or `&[u8]` before -/// implementing a custom `BincodeRead`. -/// -/// The forward_read_* methods are necessary because some byte sources want -/// to pass a long-lived borrow to the visitor and others want to pass a -/// transient slice. -pub trait BincodeRead<'storage>: io::Read { - /// Check that the next `length` bytes are a valid string and pass - /// it on to the serde reader. - fn forward_read_str(&mut self, length: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'storage>; - - /// Transfer ownership of the next `length` bytes to the caller. - fn get_byte_buffer(&mut self, length: usize) -> Result>; - - /// Pass a slice of the next `length` bytes on to the serde reader. - fn forward_read_bytes(&mut self, length: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'storage>; +//! This module contains reader-based structs and traits. +//! +//! Because `std::io::Read` is only limited to `std` and not `core`, we provide 2 alternative readers. +//! +//! [Reader] is a reader for sources that do not own their data. It is assumed that the reader's data is dropped after the `read` method is called. This reader is incapable of reading borrowed data, like `&str` and `&[u8]`. +//! +//! [BorrowReader] is an extension of `Reader` that also allows returning borrowed data. A `BorrowReader` allows reading `&str` and `&[u8]`. +//! +//! Specifically the `Reader` trait is used by [Decode] and the `BorrowReader` trait is used by `[BorrowDecode]`. +//! +//! [Decode]: ../trait.Decode.html +//! [BorrowDecode]: ../trait.BorrowDecode.html + +use crate::error::DecodeError; + +/// A reader for owned data. See the module documentation for more information. +pub trait Reader { + /// Fill the given `bytes` argument with values. Exactly the length of the given slice must be filled, or else an error must be returned. + fn read(&mut self, bytes: &mut [u8]) -> Result<(), DecodeError>; /// If this reader wraps a buffer of any kind, this function lets callers access contents of - /// the buffer without passing data through a buffer first via the `std::io::Read` interface + /// the buffer without passing data through a buffer first. #[inline] fn peek_read(&self, _: usize) -> Option<&[u8]> { None @@ -37,26 +31,13 @@ pub trait BincodeRead<'storage>: io::Read { fn consume(&mut self, _: usize) {} } -impl<'a, 'storage, T> BincodeRead<'storage> for &'a mut T +impl<'a, T> Reader for &'a mut T where - T: BincodeRead<'storage>, + T: Reader, { - fn forward_read_str(&mut self, length: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'storage>, - { - (*self).forward_read_str(length, visitor) - } - - fn get_byte_buffer(&mut self, length: usize) -> Result> { - (*self).get_byte_buffer(length) - } - - fn forward_read_bytes(&mut self, length: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'storage>, - { - (*self).forward_read_bytes(length, visitor) + #[inline] + fn read(&mut self, bytes: &mut [u8]) -> Result<(), DecodeError> { + (**self).read(bytes) } #[inline] @@ -70,115 +51,37 @@ where } } -/// A BincodeRead implementation for byte slices -pub struct SliceReader<'storage> { - slice: &'storage [u8], +/// A reader for borrowed data. Implementors of this must also implement the [Reader] trait. See the module documentation for more information. +pub trait BorrowReader<'storage>: Reader { + /// Read exactly `length` bytes and return a slice to this data. If not enough bytes could be read, an error should be returned. + /// + /// *note*: Exactly `length` bytes must be returned. If less bytes are returned, bincode may panic. If more bytes are returned, the excess bytes may be discarded. + fn take_bytes(&mut self, length: usize) -> Result<&'storage [u8], DecodeError>; } -/// A BincodeRead implementation for `io::Read`ers -pub struct IoReader { - reader: R, - temp_buffer: Vec, +/// A reader type for `&[u8]` slices. Implements both [Reader] and [BorrowReader], and thus can be used for borrowed data. +pub struct SliceReader<'storage> { + slice: &'storage [u8], } impl<'storage> SliceReader<'storage> { /// Constructs a slice reader - pub(crate) fn new(bytes: &'storage [u8]) -> SliceReader<'storage> { + pub fn new(bytes: &'storage [u8]) -> SliceReader<'storage> { SliceReader { slice: bytes } } - - #[inline(always)] - fn get_byte_slice(&mut self, length: usize) -> Result<&'storage [u8]> { - if length > self.slice.len() { - return Err(SliceReader::unexpected_eof()); - } - let (read_slice, remaining) = self.slice.split_at(length); - self.slice = remaining; - Ok(read_slice) - } - - #[inline] - pub(crate) fn is_finished(&self) -> bool { - self.slice.is_empty() - } -} - -impl IoReader { - /// Constructs an IoReadReader - pub(crate) fn new(r: R) -> IoReader { - IoReader { - reader: r, - temp_buffer: vec![], - } - } } -impl<'storage> io::Read for SliceReader<'storage> { +impl<'storage> Reader for SliceReader<'storage> { #[inline(always)] - fn read(&mut self, out: &mut [u8]) -> io::Result { - if out.len() > self.slice.len() { - return Err(io::ErrorKind::UnexpectedEof.into()); + fn read(&mut self, bytes: &mut [u8]) -> Result<(), DecodeError> { + if bytes.len() > self.slice.len() { + return Err(DecodeError::UnexpectedEnd); } - let (read_slice, remaining) = self.slice.split_at(out.len()); - out.copy_from_slice(read_slice); + let (read_slice, remaining) = self.slice.split_at(bytes.len()); + bytes.copy_from_slice(read_slice); self.slice = remaining; - Ok(out.len()) - } - - #[inline(always)] - fn read_exact(&mut self, out: &mut [u8]) -> io::Result<()> { - self.read(out).map(|_| ()) - } -} - -impl io::Read for IoReader { - #[inline(always)] - fn read(&mut self, out: &mut [u8]) -> io::Result { - self.reader.read(out) - } - #[inline(always)] - fn read_exact(&mut self, out: &mut [u8]) -> io::Result<()> { - self.reader.read_exact(out) - } -} - -impl<'storage> SliceReader<'storage> { - #[inline(never)] - #[cold] - fn unexpected_eof() -> Box { - Box::new(crate::ErrorKind::Io(io::Error::new( - io::ErrorKind::UnexpectedEof, - "", - ))) - } -} - -impl<'storage> BincodeRead<'storage> for SliceReader<'storage> { - #[inline(always)] - fn forward_read_str(&mut self, length: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'storage>, - { - use crate::ErrorKind; - let string = match ::std::str::from_utf8(self.get_byte_slice(length)?) { - Ok(s) => s, - Err(e) => return Err(ErrorKind::InvalidUtf8Encoding(e).into()), - }; - visitor.visit_borrowed_str(string) - } - - #[inline(always)] - fn forward_read_bytes(&mut self, length: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'storage>, - { - visitor.visit_borrowed_bytes(self.get_byte_slice(length)?) - } - - #[inline(always)] - fn get_byte_buffer(&mut self, length: usize) -> Result> { - self.get_byte_slice(length).map(|x| x.to_vec()) + Ok(()) } #[inline] @@ -192,136 +95,14 @@ impl<'storage> BincodeRead<'storage> for SliceReader<'storage> { } } -impl IoReader -where - R: io::Read, -{ - fn fill_buffer(&mut self, length: usize) -> Result<()> { - self.temp_buffer.resize(length, 0); - - self.reader.read_exact(&mut self.temp_buffer)?; - - Ok(()) - } -} - -impl<'a, R> BincodeRead<'a> for IoReader -where - R: io::Read, -{ - fn forward_read_str(&mut self, length: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'a>, - { - self.fill_buffer(length)?; - - let string = match ::std::str::from_utf8(&self.temp_buffer[..]) { - Ok(s) => s, - Err(e) => return Err(crate::ErrorKind::InvalidUtf8Encoding(e).into()), - }; - - visitor.visit_str(string) - } - - fn get_byte_buffer(&mut self, length: usize) -> Result> { - self.fill_buffer(length)?; - Ok(::std::mem::take(&mut self.temp_buffer)) - } - - fn forward_read_bytes(&mut self, length: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'a>, - { - self.fill_buffer(length)?; - visitor.visit_bytes(&self.temp_buffer[..]) - } -} - -impl<'storage, R> BincodeRead<'storage> for std::io::BufReader -where - R: io::Read, -{ - fn forward_read_str(&mut self, length: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'storage>, - { - let mut consume = false; - let mut temp_buf = Vec::new(); - let buf = if let Some(buf) = self.peek_read(length) { - consume = true; - buf - } else { - temp_buf.resize(length, 0); - ::read_exact(self, &mut temp_buf)?; - &temp_buf - }; - let string = match ::std::str::from_utf8(buf) { - Ok(s) => s, - Err(e) => return Err(crate::ErrorKind::InvalidUtf8Encoding(e).into()), - }; - - let res = visitor.visit_str::(string); - if consume { - self.consume(length); - } - res - } - - fn forward_read_bytes(&mut self, length: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'storage>, - { - let mut consume = false; - let mut temp_buf = Vec::new(); - let buf = if let Some(buf) = self.peek_read(length) { - consume = true; - buf - } else { - temp_buf.resize(length, 0); - ::read_exact(self, &mut temp_buf)?; - &temp_buf - }; - - let res = visitor.visit_bytes::(buf); - if consume { - self.consume(length); +impl<'storage> BorrowReader<'storage> for SliceReader<'storage> { + #[inline(always)] + fn take_bytes(&mut self, length: usize) -> Result<&'storage [u8], DecodeError> { + if length > self.slice.len() { + return Err(DecodeError::UnexpectedEnd); } - res - } - - fn get_byte_buffer(&mut self, length: usize) -> Result> { - let mut buf = vec![0; length]; - ::read_exact(self, &mut buf)?; - Ok(buf) - } - - #[inline] - fn peek_read(&self, n: usize) -> Option<&[u8]> { - self.buffer().get(..n) - } - - #[inline] - fn consume(&mut self, n: usize) { - ::consume(self, n); - } -} - -#[cfg(test)] -mod test { - use super::IoReader; - - #[test] - fn test_fill_buffer() { - let buffer = vec![0u8; 64]; - let mut reader = IoReader::new(buffer.as_slice()); - - reader.fill_buffer(20).unwrap(); - assert_eq!(20, reader.temp_buffer.len()); - - reader.fill_buffer(30).unwrap(); - assert_eq!(30, reader.temp_buffer.len()); - - reader.fill_buffer(5).unwrap(); - assert_eq!(5, reader.temp_buffer.len()); + let (read_slice, remaining) = self.slice.split_at(length); + self.slice = remaining; + Ok(read_slice) } } diff --git a/src/enc/encoder.rs b/src/enc/encoder.rs new file mode 100644 index 000000000..f2c83f9f1 --- /dev/null +++ b/src/enc/encoder.rs @@ -0,0 +1,54 @@ +use super::{write::Writer, Encoder}; +use crate::{config::Config, utils::Sealed}; + +/// An Encoder that writes bytes into a given writer `W`. +/// +/// This struct should rarely be used. +/// In most cases, prefer any of the `encode` functions. +/// +/// The ByteOrder that is chosen will impact the endianness that +/// is used to write integers to the writer. +/// +/// ``` +/// # use bincode::enc::{write::SliceWriter, EncoderImpl, Encode}; +/// # use bincode::config::{self, Config}; +/// # let config = config::Configuration::standard().with_fixed_int_encoding().with_big_endian(); +/// let slice: &mut [u8] = &mut [0, 0, 0, 0]; +/// let mut encoder = EncoderImpl::new(SliceWriter::new(slice), config); +/// // this u32 can be any Encodable +/// 5u32.encode(&mut encoder).unwrap(); +/// assert_eq!(encoder.into_writer().bytes_written(), 4); +/// assert_eq!(slice, [0, 0, 0, 5]); +/// ``` +pub struct EncoderImpl { + writer: W, + config: C, +} + +impl EncoderImpl { + /// Create a new Encoder + pub fn new(writer: W, config: C) -> EncoderImpl { + EncoderImpl { writer, config } + } + + /// Return the underlying writer + pub fn into_writer(self) -> W { + self.writer + } +} + +impl<'a, W: Writer, C: Config> Encoder for &'a mut EncoderImpl { + type W = W; + + type C = C; + + fn writer(&mut self) -> &mut Self::W { + &mut self.writer + } + + fn config(&self) -> &Self::C { + &self.config + } +} + +impl<'a, W: Writer, C: Config> Sealed for &'a mut EncoderImpl {} diff --git a/src/enc/impl_tuples.rs b/src/enc/impl_tuples.rs new file mode 100644 index 000000000..3ab46b10d --- /dev/null +++ b/src/enc/impl_tuples.rs @@ -0,0 +1,138 @@ +use super::{Encode, Encoder}; +use crate::error::EncodeError; + +impl Encode for (A,) +where + A: Encode, +{ + fn encode<_E: Encoder>(&self, mut encoder: _E) -> Result<(), EncodeError> { + self.0.encode(&mut encoder)?; + Ok(()) + } +} + +impl Encode for (A, B) +where + A: Encode, + B: Encode, +{ + fn encode<_E: Encoder>(&self, mut encoder: _E) -> Result<(), EncodeError> { + self.0.encode(&mut encoder)?; + self.1.encode(&mut encoder)?; + Ok(()) + } +} + +impl Encode for (A, B, C) +where + A: Encode, + B: Encode, + C: Encode, +{ + fn encode<_E: Encoder>(&self, mut encoder: _E) -> Result<(), EncodeError> { + self.0.encode(&mut encoder)?; + self.1.encode(&mut encoder)?; + self.2.encode(&mut encoder)?; + Ok(()) + } +} + +impl Encode for (A, B, C, D) +where + A: Encode, + B: Encode, + C: Encode, + D: Encode, +{ + fn encode<_E: Encoder>(&self, mut encoder: _E) -> Result<(), EncodeError> { + self.0.encode(&mut encoder)?; + self.1.encode(&mut encoder)?; + self.2.encode(&mut encoder)?; + self.3.encode(&mut encoder)?; + Ok(()) + } +} + +impl Encode for (A, B, C, D, E) +where + A: Encode, + B: Encode, + C: Encode, + D: Encode, + E: Encode, +{ + fn encode<_E: Encoder>(&self, mut encoder: _E) -> Result<(), EncodeError> { + self.0.encode(&mut encoder)?; + self.1.encode(&mut encoder)?; + self.2.encode(&mut encoder)?; + self.3.encode(&mut encoder)?; + self.4.encode(&mut encoder)?; + Ok(()) + } +} + +impl Encode for (A, B, C, D, E, F) +where + A: Encode, + B: Encode, + C: Encode, + D: Encode, + E: Encode, + F: Encode, +{ + fn encode<_E: Encoder>(&self, mut encoder: _E) -> Result<(), EncodeError> { + self.0.encode(&mut encoder)?; + self.1.encode(&mut encoder)?; + self.2.encode(&mut encoder)?; + self.3.encode(&mut encoder)?; + self.4.encode(&mut encoder)?; + self.5.encode(&mut encoder)?; + Ok(()) + } +} + +impl Encode for (A, B, C, D, E, F, G) +where + A: Encode, + B: Encode, + C: Encode, + D: Encode, + E: Encode, + F: Encode, + G: Encode, +{ + fn encode<_E: Encoder>(&self, mut encoder: _E) -> Result<(), EncodeError> { + self.0.encode(&mut encoder)?; + self.1.encode(&mut encoder)?; + self.2.encode(&mut encoder)?; + self.3.encode(&mut encoder)?; + self.4.encode(&mut encoder)?; + self.5.encode(&mut encoder)?; + self.6.encode(&mut encoder)?; + Ok(()) + } +} + +impl Encode for (A, B, C, D, E, F, G, H) +where + A: Encode, + B: Encode, + C: Encode, + D: Encode, + E: Encode, + F: Encode, + G: Encode, + H: Encode, +{ + fn encode<_E: Encoder>(&self, mut encoder: _E) -> Result<(), EncodeError> { + self.0.encode(&mut encoder)?; + self.1.encode(&mut encoder)?; + self.2.encode(&mut encoder)?; + self.3.encode(&mut encoder)?; + self.4.encode(&mut encoder)?; + self.5.encode(&mut encoder)?; + self.6.encode(&mut encoder)?; + self.7.encode(&mut encoder)?; + Ok(()) + } +} diff --git a/src/enc/impls.rs b/src/enc/impls.rs new file mode 100644 index 000000000..6685839c7 --- /dev/null +++ b/src/enc/impls.rs @@ -0,0 +1,472 @@ +use super::{write::Writer, Encode, Encoder}; +use crate::{ + config::{ + Endian, IntEncoding, InternalArrayLengthConfig, InternalEndianConfig, + InternalIntEncodingConfig, + }, + error::EncodeError, +}; +use core::{ + cell::{Cell, RefCell}, + num::{ + NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroIsize, NonZeroU128, + NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize, + }, + ops::{Bound, Range, RangeInclusive}, + time::Duration, +}; + +impl Encode for bool { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + if *self { 1u8 } else { 0u8 }.encode(encoder) + } +} + +impl Encode for u8 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + encoder.writer().write(&[*self]) + } +} + +impl Encode for NonZeroU8 { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.get().encode(encoder) + } +} + +impl Encode for u16 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match E::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_encode_u16(encoder.writer(), E::C::ENDIAN, *self) + } + IntEncoding::Fixed => match E::C::ENDIAN { + Endian::Big => encoder.writer().write(&self.to_be_bytes()), + Endian::Little => encoder.writer().write(&self.to_le_bytes()), + }, + } + } +} + +impl Encode for NonZeroU16 { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.get().encode(encoder) + } +} + +impl Encode for u32 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match E::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_encode_u32(encoder.writer(), E::C::ENDIAN, *self) + } + IntEncoding::Fixed => match E::C::ENDIAN { + Endian::Big => encoder.writer().write(&self.to_be_bytes()), + Endian::Little => encoder.writer().write(&self.to_le_bytes()), + }, + } + } +} + +impl Encode for NonZeroU32 { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.get().encode(encoder) + } +} + +impl Encode for u64 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match E::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_encode_u64(encoder.writer(), E::C::ENDIAN, *self) + } + IntEncoding::Fixed => match E::C::ENDIAN { + Endian::Big => encoder.writer().write(&self.to_be_bytes()), + Endian::Little => encoder.writer().write(&self.to_le_bytes()), + }, + } + } +} + +impl Encode for NonZeroU64 { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.get().encode(encoder) + } +} + +impl Encode for u128 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match E::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_encode_u128(encoder.writer(), E::C::ENDIAN, *self) + } + IntEncoding::Fixed => match E::C::ENDIAN { + Endian::Big => encoder.writer().write(&self.to_be_bytes()), + Endian::Little => encoder.writer().write(&self.to_le_bytes()), + }, + } + } +} + +impl Encode for NonZeroU128 { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.get().encode(encoder) + } +} + +impl Encode for usize { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match E::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_encode_usize(encoder.writer(), E::C::ENDIAN, *self) + } + IntEncoding::Fixed => match E::C::ENDIAN { + Endian::Big => encoder.writer().write(&self.to_be_bytes()), + Endian::Little => encoder.writer().write(&self.to_le_bytes()), + }, + } + } +} + +impl Encode for NonZeroUsize { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.get().encode(encoder) + } +} + +impl Encode for i8 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + encoder.writer().write(&[*self as u8]) + } +} + +impl Encode for NonZeroI8 { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.get().encode(encoder) + } +} + +impl Encode for i16 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match E::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_encode_i16(encoder.writer(), E::C::ENDIAN, *self) + } + IntEncoding::Fixed => match E::C::ENDIAN { + Endian::Big => encoder.writer().write(&self.to_be_bytes()), + Endian::Little => encoder.writer().write(&self.to_le_bytes()), + }, + } + } +} + +impl Encode for NonZeroI16 { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.get().encode(encoder) + } +} + +impl Encode for i32 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match E::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_encode_i32(encoder.writer(), E::C::ENDIAN, *self) + } + IntEncoding::Fixed => match E::C::ENDIAN { + Endian::Big => encoder.writer().write(&self.to_be_bytes()), + Endian::Little => encoder.writer().write(&self.to_le_bytes()), + }, + } + } +} + +impl Encode for NonZeroI32 { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.get().encode(encoder) + } +} + +impl Encode for i64 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match E::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_encode_i64(encoder.writer(), E::C::ENDIAN, *self) + } + IntEncoding::Fixed => match E::C::ENDIAN { + Endian::Big => encoder.writer().write(&self.to_be_bytes()), + Endian::Little => encoder.writer().write(&self.to_le_bytes()), + }, + } + } +} + +impl Encode for NonZeroI64 { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.get().encode(encoder) + } +} + +impl Encode for i128 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match E::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_encode_i128(encoder.writer(), E::C::ENDIAN, *self) + } + IntEncoding::Fixed => match E::C::ENDIAN { + Endian::Big => encoder.writer().write(&self.to_be_bytes()), + Endian::Little => encoder.writer().write(&self.to_le_bytes()), + }, + } + } +} + +impl Encode for NonZeroI128 { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.get().encode(encoder) + } +} + +impl Encode for isize { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match E::C::INT_ENCODING { + IntEncoding::Variable => { + crate::varint::varint_encode_isize(encoder.writer(), E::C::ENDIAN, *self) + } + IntEncoding::Fixed => match E::C::ENDIAN { + Endian::Big => encoder.writer().write(&self.to_be_bytes()), + Endian::Little => encoder.writer().write(&self.to_le_bytes()), + }, + } + } +} + +impl Encode for NonZeroIsize { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.get().encode(encoder) + } +} + +impl Encode for f32 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match E::C::ENDIAN { + Endian::Big => encoder.writer().write(&self.to_be_bytes()), + Endian::Little => encoder.writer().write(&self.to_le_bytes()), + } + } +} + +impl Encode for f64 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match E::C::ENDIAN { + Endian::Big => encoder.writer().write(&self.to_be_bytes()), + Endian::Little => encoder.writer().write(&self.to_le_bytes()), + } + } +} + +impl Encode for char { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + encode_utf8(encoder.writer(), *self) + } +} + +impl Encode for &'_ [u8] { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + self.len().encode(&mut encoder)?; + encoder.writer().write(self) + } +} + +const TAG_CONT: u8 = 0b1000_0000; +const TAG_TWO_B: u8 = 0b1100_0000; +const TAG_THREE_B: u8 = 0b1110_0000; +const TAG_FOUR_B: u8 = 0b1111_0000; +const MAX_ONE_B: u32 = 0x80; +const MAX_TWO_B: u32 = 0x800; +const MAX_THREE_B: u32 = 0x10000; + +fn encode_utf8(writer: &mut impl Writer, c: char) -> Result<(), EncodeError> { + let code = c as u32; + + if code < MAX_ONE_B { + writer.write(&[c as u8]) + } else if code < MAX_TWO_B { + let mut buf = [0u8; 2]; + buf[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; + buf[1] = (code & 0x3F) as u8 | TAG_CONT; + writer.write(&buf) + } else if code < MAX_THREE_B { + let mut buf = [0u8; 3]; + buf[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; + buf[1] = (code >> 6 & 0x3F) as u8 | TAG_CONT; + buf[2] = (code & 0x3F) as u8 | TAG_CONT; + writer.write(&buf) + } else { + let mut buf = [0u8; 4]; + buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; + buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; + buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; + buf[3] = (code & 0x3F) as u8 | TAG_CONT; + writer.write(&buf) + } +} + +// BlockedTODO: https://github.com/rust-lang/rust/issues/37653 +// +// We'll want to implement encoding for both &[u8] and &[T: Encode], +// but those implementations overlap because u8 also implements Encodeabl +// +// default impl Encode for &'_ [u8] { +// fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { +// encoder.encode_slice(*self) +// } +// } +// +// impl Encode for &'_ [T] { +// fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { +// self.len().encode(&mut encoder)?; +// for item in self.iter() { +// item.encode(&mut encoder)?; +// } +// Ok(()) +// } +// } + +impl Encode for &'_ str { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.as_bytes().encode(encoder) + } +} + +impl Encode for [T; N] +where + T: Encode, +{ + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + if !E::C::SKIP_FIXED_ARRAY_LENGTH { + N.encode(&mut encoder)?; + } + for item in self.iter() { + item.encode(&mut encoder)?; + } + Ok(()) + } +} + +impl Encode for Option +where + T: Encode, +{ + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + if let Some(val) = self { + 1u8.encode(&mut encoder)?; + val.encode(encoder) + } else { + 0u8.encode(encoder) + } + } +} + +impl Encode for Result +where + T: Encode, + U: Encode, +{ + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match self { + Ok(val) => { + 0u8.encode(&mut encoder)?; + val.encode(encoder) + } + Err(err) => { + 1u8.encode(&mut encoder)?; + err.encode(encoder) + } + } + } +} + +impl Encode for Cell +where + T: Encode + Copy, +{ + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + T::encode(&self.get(), encoder) + } +} + +impl Encode for RefCell +where + T: Encode, +{ + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + let borrow_guard = self + .try_borrow() + .map_err(|e| EncodeError::RefCellAlreadyBorrowed { + inner: e, + type_name: core::any::type_name::>(), + })?; + T::encode(&borrow_guard, encoder) + } +} + +impl Encode for Duration { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + self.as_secs().encode(&mut encoder)?; + self.subsec_nanos().encode(&mut encoder)?; + Ok(()) + } +} + +impl Encode for Range +where + T: Encode, +{ + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + self.start.encode(&mut encoder)?; + self.end.encode(&mut encoder)?; + Ok(()) + } +} + +impl Encode for RangeInclusive +where + T: Encode, +{ + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + self.start().encode(&mut encoder)?; + self.end().encode(&mut encoder)?; + Ok(()) + } +} + +impl Encode for Bound +where + T: Encode, +{ + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match self { + Self::Unbounded => { + 0u32.encode(encoder)?; + } + Self::Included(val) => { + 1u32.encode(&mut encoder)?; + val.encode(encoder)?; + } + Self::Excluded(val) => { + 2u32.encode(&mut encoder)?; + val.encode(encoder)?; + } + } + Ok(()) + } +} + +impl<'a, T> Encode for &'a T +where + T: Encode, +{ + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + T::encode(self, encoder) + } +} diff --git a/src/enc/mod.rs b/src/enc/mod.rs new file mode 100644 index 000000000..6f5801218 --- /dev/null +++ b/src/enc/mod.rs @@ -0,0 +1,52 @@ +//! Encoder-based structs and traits. + +mod encoder; +mod impl_tuples; +mod impls; + +use self::write::Writer; +use crate::{config::Config, error::EncodeError, utils::Sealed}; + +pub mod write; + +pub use self::encoder::EncoderImpl; + +/// Any source that can be encoded. This trait should be implemented for all types that you want to be able to use with any of the `encode_with` methods. +/// +/// This trait will be automatically implemented if you enable the `derive` feature and add `#[derive(bincode::Encode)]` to your trait. +pub trait Encode { + /// Encode a given type. + fn encode(&self, encoder: E) -> Result<(), EncodeError>; +} + +/// Helper trait to encode basic types into. +pub trait Encoder: Sealed { + /// The concrete [Writer] type + type W: Writer; + + /// The concrete [Config] type + type C: Config; + + /// Returns a mutable reference to the writer + fn writer(&mut self) -> &mut Self::W; + + /// Returns a reference to the config + fn config(&self) -> &Self::C; +} + +impl<'a, T> Encoder for &'a mut T +where + T: Encoder, +{ + type W = T::W; + + type C = T::C; + + fn writer(&mut self) -> &mut Self::W { + T::writer(self) + } + + fn config(&self) -> &Self::C { + T::config(self) + } +} diff --git a/src/enc/write.rs b/src/enc/write.rs new file mode 100644 index 000000000..76188303f --- /dev/null +++ b/src/enc/write.rs @@ -0,0 +1,60 @@ +//! This module contains writer-based structs and traits. +//! +//! Because `std::io::Write` is only limited to `std` and not `core`, we provide our own [Writer]. + +use crate::error::EncodeError; + +/// Trait that indicates that a struct can be used as a destination to encode data too. This is used by [Encode] +/// +/// [Encode]: ../trait.Encode.html +pub trait Writer { + /// Write `bytes` to the underlying writer. Exactly `bytes.len()` bytes must be written, or else an error should be returned. + fn write(&mut self, bytes: &[u8]) -> Result<(), EncodeError>; +} + +/// A helper struct that implements `Writer` for a `&[u8]` slice. +/// +/// ``` +/// use bincode::enc::write::{Writer, SliceWriter}; +/// +/// let destination = &mut [0u8; 100]; +/// let mut writer = SliceWriter::new(destination); +/// writer.write(&[1, 2, 3, 4, 5]).unwrap(); +/// +/// assert_eq!(writer.bytes_written(), 5); +/// assert_eq!(destination[0..6], [1, 2, 3, 4, 5, 0]); +/// ``` +pub struct SliceWriter<'storage> { + slice: &'storage mut [u8], + original_length: usize, +} + +impl<'storage> SliceWriter<'storage> { + /// Create a new instance of `SliceWriter` with the given byte array. + pub fn new(bytes: &'storage mut [u8]) -> SliceWriter<'storage> { + let original = bytes.len(); + SliceWriter { + slice: bytes, + original_length: original, + } + } + + /// Return the amount of bytes written so far. + pub fn bytes_written(&self) -> usize { + self.original_length - self.slice.len() + } +} + +impl<'storage> Writer for SliceWriter<'storage> { + #[inline(always)] + fn write(&mut self, bytes: &[u8]) -> Result<(), EncodeError> { + if bytes.len() > self.slice.len() { + return Err(EncodeError::UnexpectedEnd); + } + let (a, b) = core::mem::replace(&mut self.slice, &mut []).split_at_mut(bytes.len()); + a.copy_from_slice(bytes); + self.slice = b; + + Ok(()) + } +} diff --git a/src/error.rs b/src/error.rs index 868492466..856dc8a08 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,96 +1,166 @@ -use std::error::Error as StdError; -use std::fmt; -use std::io; -use std::str::Utf8Error; +//! Errors that can be encounting by Encoding and Decoding. -/// The result of a serialization or deserialization operation. -pub type Result = ::std::result::Result; +/// Errors that can be encountered by encoding a type +#[non_exhaustive] +#[derive(Debug)] +pub enum EncodeError { + /// The writer ran out of storage. + UnexpectedEnd, -/// An error that can be produced during (de)serializing. -pub type Error = Box; + /// The RefCell is already borrowed + RefCellAlreadyBorrowed { + /// The inner borrow error + inner: core::cell::BorrowError, + /// the type name of the RefCell being encoded that is currently borrowed. + type_name: &'static str, + }, -/// The kind of error that can be produced during a serialization or deserialization. -#[derive(Debug)] -#[non_exhaustive] -pub enum ErrorKind { - /// If the error stems from the reader/writer that is being used - /// during (de)serialization, that error will be stored and returned here. - Io(io::Error), - /// Returned if the deserializer attempts to deserialize a string that is not valid utf8 - InvalidUtf8Encoding(Utf8Error), - /// Returned if the deserializer attempts to deserialize a bool that was - /// not encoded as either a 1 or a 0 - InvalidBoolEncoding(u8), - /// Returned if the deserializer attempts to deserialize a char that is not in the correct format. - InvalidCharEncoding, - /// Returned if the deserializer attempts to deserialize the tag of an enum that is - /// not in the expected ranges - InvalidTagEncoding(usize), - /// Serde has a deserialize_any method that lets the format hint to the - /// object which route to take in deserializing. - DeserializeAnyNotSupported, - /// If (de)serializing a message takes more than the provided size limit, this - /// error is returned. - SizeLimit, - /// Bincode can not encode sequences of unknown length (like iterators). - SequenceMustHaveLength, - /// A custom error message from Serde. - Custom(String), -} + /// An uncommon error occured, see the inner text for more information + Other(&'static str), -impl StdError for ErrorKind { - fn source(&self) -> Option<&(dyn StdError + 'static)> { - match *self { - ErrorKind::Io(ref err) => Some(err), - ErrorKind::InvalidUtf8Encoding(_) => None, - ErrorKind::InvalidBoolEncoding(_) => None, - ErrorKind::InvalidCharEncoding => None, - ErrorKind::InvalidTagEncoding(_) => None, - ErrorKind::SequenceMustHaveLength => None, - ErrorKind::DeserializeAnyNotSupported => None, - ErrorKind::SizeLimit => None, - ErrorKind::Custom(_) => None, - } - } + /// A `std::path::Path` was being encoded but did not contain a valid `&str` representation + #[cfg(feature = "std")] + InvalidPathCharacters, + + /// The targetted writer encountered an `std::io::Error` + #[cfg(feature = "std")] + Io { + /// The encountered error + error: std::io::Error, + /// The amount of bytes that were written before the error occured + index: usize, + }, + + /// The encoder tried to encode a `Mutex` or `RwLock`, but the locking failed + #[cfg(feature = "std")] + LockFailed { + /// The type name of the mutex for debugging purposes + type_name: &'static str, + }, + + /// The encoder tried to encode a `SystemTime`, but it was before `SystemTime::UNIX_EPOCH` + #[cfg(feature = "std")] + InvalidSystemTime { + /// The error that was thrown by the SystemTime + inner: std::time::SystemTimeError, + /// The SystemTime that caused the error + time: std::time::SystemTime, + }, } -impl From for Error { - fn from(err: io::Error) -> Error { - ErrorKind::Io(err).into() - } +/// Errors that can be encounted by decoding a type +#[non_exhaustive] +#[derive(Debug)] +pub enum DecodeError { + /// The reader reached its end but more bytes were expected. + UnexpectedEnd, + + /// Invalid type was found. The decoder tried to read type `expected`, but found type `found` instead. + InvalidIntegerType { + /// The type that was being read from the reader + expected: IntegerType, + /// The type that was encoded in the data + found: IntegerType, + }, + + /// The decoder tried to decode any of the `NonZero*` types but the value is zero + NonZeroTypeIsZero { + /// The type that was being read from the reader + non_zero_type: IntegerType, + }, + + /// Invalid enum variant was found. The decoder tried to decode variant index `found`, but the variant index should be between `min` and `max`. + UnexpectedVariant { + /// The type name that was being decoded. + type_name: &'static str, + + /// The min index of the enum. Usually this is `0`. + min: u32, + + /// the max index of the enum. + max: u32, + + /// The index of the enum that the decoder encountered + found: u32, + }, + + /// The decoder tried to decode a `str`, but an utf8 error was encountered. + Utf8(core::str::Utf8Error), + + /// The decoder tried to decode a `char` and failed. The given buffer contains the bytes that are read at the moment of failure. + InvalidCharEncoding([u8; 4]), + + /// The decoder tried to decode a `bool` and failed. The given value is what is actually read. + InvalidBooleanValue(u8), + + /// The decoder tried to decode an array of length `required`, but the binary data contained an array of length `found`. + ArrayLengthMismatch { + /// The length of the array required by the rust type. + required: usize, + /// The length of the array found in the binary format. + found: usize, + }, + + /// The decoder tried to decode a `CStr` or `CString`, but the incoming data contained a 0 byte + #[cfg(feature = "std")] + CStrNulError { + /// The inner exception + inner: std::ffi::FromBytesWithNulError, + }, } -impl fmt::Display for ErrorKind { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - match *self { - ErrorKind::Io(ref ioerr) => write!(fmt, "io error: {}", ioerr), - ErrorKind::InvalidUtf8Encoding(ref e) => write!(fmt, "string is not valid utf8: {}", e), - ErrorKind::InvalidBoolEncoding(b) => { - write!(fmt, "invalid u8 while decoding bool, expected 0 or 1, found {}", b) - } - ErrorKind::InvalidCharEncoding => write!(fmt, "char is not valid"), - ErrorKind::InvalidTagEncoding(tag) => { - write!(fmt, "tag for enum is not valid, found {}", tag) - } - ErrorKind::SequenceMustHaveLength => write!(fmt, "Bincode can only encode sequences and maps that have a knowable size ahead of time"), - ErrorKind::SizeLimit => write!(fmt, "the size limit has been reached"), - ErrorKind::DeserializeAnyNotSupported => write!( - fmt, - "Bincode does not support the serde::Deserializer::deserialize_any method" - ), - ErrorKind::Custom(ref s) => s.fmt(fmt), +impl DecodeError { + /// If the current error is `InvalidIntegerType`, change the `expected` and + /// `found` values from `Ux` to `Ix`. This is needed to have correct error + /// reporting in src/varint/decode_signed.rs since this calls + /// src/varint/decode_unsigned.rs and needs to correct the `expected` and + /// `found` types. + pub(crate) fn change_integer_type_to_signed(self) -> DecodeError { + match self { + Self::InvalidIntegerType { expected, found } => Self::InvalidIntegerType { + expected: expected.into_signed(), + found: found.into_signed(), + }, + other => other, } } } -impl serde::de::Error for Error { - fn custom(desc: T) -> Error { - ErrorKind::Custom(desc.to_string()).into() - } +/// Integer types. Used by [DecodeError]. These types have no purpose other than being shown in errors. +#[non_exhaustive] +#[derive(Debug)] +#[allow(missing_docs)] +pub enum IntegerType { + U8, + U16, + U32, + U64, + U128, + Usize, + + I8, + I16, + I32, + I64, + I128, + Isize, + + Reserved, } -impl serde::ser::Error for Error { - fn custom(msg: T) -> Self { - ErrorKind::Custom(msg.to_string()).into() +impl IntegerType { + /// Change the `Ux` value to the associated `Ix` value. + /// Returns the old value if `self` is already `Ix`. + pub(crate) fn into_signed(self) -> Self { + match self { + Self::U8 => Self::I8, + Self::U16 => Self::I16, + Self::U32 => Self::I32, + Self::U64 => Self::I64, + Self::U128 => Self::I128, + Self::Usize => Self::Isize, + + other => other, + } } } diff --git a/src/features/atomic.rs b/src/features/atomic.rs new file mode 100644 index 000000000..2f2e85a67 --- /dev/null +++ b/src/features/atomic.rs @@ -0,0 +1,137 @@ +use crate::{de::Decode, enc::Encode}; +use core::sync::atomic::{ + AtomicBool, AtomicI16, AtomicI32, AtomicI64, AtomicI8, AtomicIsize, AtomicU16, AtomicU32, + AtomicU64, AtomicU8, AtomicUsize, Ordering, +}; + +impl Encode for AtomicBool { + fn encode(&self, encoder: E) -> Result<(), crate::error::EncodeError> { + self.load(Ordering::SeqCst).encode(encoder) + } +} + +impl Decode for AtomicBool { + fn decode(decoder: D) -> Result { + Ok(AtomicBool::new(Decode::decode(decoder)?)) + } +} + +impl Encode for AtomicU8 { + fn encode(&self, encoder: E) -> Result<(), crate::error::EncodeError> { + self.load(Ordering::SeqCst).encode(encoder) + } +} + +impl Decode for AtomicU8 { + fn decode(decoder: D) -> Result { + Ok(AtomicU8::new(Decode::decode(decoder)?)) + } +} + +impl Encode for AtomicU16 { + fn encode(&self, encoder: E) -> Result<(), crate::error::EncodeError> { + self.load(Ordering::SeqCst).encode(encoder) + } +} + +impl Decode for AtomicU16 { + fn decode(decoder: D) -> Result { + Ok(AtomicU16::new(Decode::decode(decoder)?)) + } +} + +impl Encode for AtomicU32 { + fn encode(&self, encoder: E) -> Result<(), crate::error::EncodeError> { + self.load(Ordering::SeqCst).encode(encoder) + } +} + +impl Decode for AtomicU32 { + fn decode(decoder: D) -> Result { + Ok(AtomicU32::new(Decode::decode(decoder)?)) + } +} + +impl Encode for AtomicU64 { + fn encode(&self, encoder: E) -> Result<(), crate::error::EncodeError> { + self.load(Ordering::SeqCst).encode(encoder) + } +} + +impl Decode for AtomicU64 { + fn decode(decoder: D) -> Result { + Ok(AtomicU64::new(Decode::decode(decoder)?)) + } +} + +impl Encode for AtomicUsize { + fn encode(&self, encoder: E) -> Result<(), crate::error::EncodeError> { + self.load(Ordering::SeqCst).encode(encoder) + } +} + +impl Decode for AtomicUsize { + fn decode(decoder: D) -> Result { + Ok(AtomicUsize::new(Decode::decode(decoder)?)) + } +} + +impl Encode for AtomicI8 { + fn encode(&self, encoder: E) -> Result<(), crate::error::EncodeError> { + self.load(Ordering::SeqCst).encode(encoder) + } +} + +impl Decode for AtomicI8 { + fn decode(decoder: D) -> Result { + Ok(AtomicI8::new(Decode::decode(decoder)?)) + } +} + +impl Encode for AtomicI16 { + fn encode(&self, encoder: E) -> Result<(), crate::error::EncodeError> { + self.load(Ordering::SeqCst).encode(encoder) + } +} + +impl Decode for AtomicI16 { + fn decode(decoder: D) -> Result { + Ok(AtomicI16::new(Decode::decode(decoder)?)) + } +} + +impl Encode for AtomicI32 { + fn encode(&self, encoder: E) -> Result<(), crate::error::EncodeError> { + self.load(Ordering::SeqCst).encode(encoder) + } +} + +impl Decode for AtomicI32 { + fn decode(decoder: D) -> Result { + Ok(AtomicI32::new(Decode::decode(decoder)?)) + } +} + +impl Encode for AtomicI64 { + fn encode(&self, encoder: E) -> Result<(), crate::error::EncodeError> { + self.load(Ordering::SeqCst).encode(encoder) + } +} + +impl Decode for AtomicI64 { + fn decode(decoder: D) -> Result { + Ok(AtomicI64::new(Decode::decode(decoder)?)) + } +} + +impl Encode for AtomicIsize { + fn encode(&self, encoder: E) -> Result<(), crate::error::EncodeError> { + self.load(Ordering::SeqCst).encode(encoder) + } +} + +impl Decode for AtomicIsize { + fn decode(decoder: D) -> Result { + Ok(AtomicIsize::new(Decode::decode(decoder)?)) + } +} diff --git a/src/features/derive.rs b/src/features/derive.rs new file mode 100644 index 000000000..57b3a5824 --- /dev/null +++ b/src/features/derive.rs @@ -0,0 +1,2 @@ +#[cfg_attr(docsrs, doc(cfg(feature = "derive")))] +pub use bincode_derive::{Decode, Encode}; diff --git a/src/features/impl_alloc.rs b/src/features/impl_alloc.rs new file mode 100644 index 000000000..f33a786fe --- /dev/null +++ b/src/features/impl_alloc.rs @@ -0,0 +1,289 @@ +use crate::{ + de::{Decode, Decoder}, + enc::{self, Encode, Encoder}, + error::{DecodeError, EncodeError}, + Config, +}; +#[cfg(feature = "atomic")] +use alloc::sync::Arc; +use alloc::{borrow::Cow, boxed::Box, collections::*, rc::Rc, string::String, vec::Vec}; + +#[derive(Default)] +struct VecWriter { + inner: Vec, +} + +impl enc::write::Writer for VecWriter { + fn write(&mut self, bytes: &[u8]) -> Result<(), EncodeError> { + self.inner.extend_from_slice(bytes); + Ok(()) + } +} + +/// Encode the given value into a `Vec` with the given `Config`. See the [config] module for more information. +/// +/// [config]: config/index.html +#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] +pub fn encode_to_vec(val: E, config: C) -> Result, EncodeError> { + let writer = VecWriter::default(); + let mut encoder = enc::EncoderImpl::<_, C>::new(writer, config); + val.encode(&mut encoder)?; + Ok(encoder.into_writer().inner) +} + +impl Decode for BinaryHeap +where + T: Decode + Ord, +{ + fn decode(mut decoder: D) -> Result { + let len = usize::decode(&mut decoder)?; + let mut map = BinaryHeap::with_capacity(len); + for _ in 0..len { + let key = T::decode(&mut decoder)?; + map.push(key); + } + Ok(map) + } +} + +impl Encode for BinaryHeap +where + T: Encode + Ord, +{ + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + self.len().encode(&mut encoder)?; + for val in self.iter() { + val.encode(&mut encoder)?; + } + Ok(()) + } +} + +impl Decode for BTreeMap +where + K: Decode + Ord, + V: Decode, +{ + fn decode(mut decoder: D) -> Result { + let len = usize::decode(&mut decoder)?; + let mut map = BTreeMap::new(); + for _ in 0..len { + let key = K::decode(&mut decoder)?; + let value = V::decode(&mut decoder)?; + map.insert(key, value); + } + Ok(map) + } +} + +impl Encode for BTreeMap +where + K: Encode + Ord, + V: Encode, +{ + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + self.len().encode(&mut encoder)?; + for (key, val) in self.iter() { + key.encode(&mut encoder)?; + val.encode(&mut encoder)?; + } + Ok(()) + } +} + +impl Decode for BTreeSet +where + T: Decode + Ord, +{ + fn decode(mut decoder: D) -> Result { + let len = usize::decode(&mut decoder)?; + let mut map = BTreeSet::new(); + for _ in 0..len { + let key = T::decode(&mut decoder)?; + map.insert(key); + } + Ok(map) + } +} + +impl Encode for BTreeSet +where + T: Encode + Ord, +{ + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + self.len().encode(&mut encoder)?; + for item in self.iter() { + item.encode(&mut encoder)?; + } + Ok(()) + } +} + +impl Decode for VecDeque +where + T: Decode, +{ + fn decode(mut decoder: D) -> Result { + let len = usize::decode(&mut decoder)?; + let mut map = VecDeque::with_capacity(len); + for _ in 0..len { + let key = T::decode(&mut decoder)?; + map.push_back(key); + } + Ok(map) + } +} + +impl Encode for VecDeque +where + T: Encode, +{ + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + self.len().encode(&mut encoder)?; + for item in self.iter() { + item.encode(&mut encoder)?; + } + Ok(()) + } +} + +impl Decode for Vec +where + T: Decode, +{ + fn decode(mut decoder: D) -> Result { + let len = usize::decode(&mut decoder)?; + let mut vec = Vec::with_capacity(len); + for _ in 0..len { + vec.push(T::decode(&mut decoder)?); + } + Ok(vec) + } +} + +impl Encode for Vec +where + T: Encode, +{ + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + self.len().encode(&mut encoder)?; + for item in self.iter() { + item.encode(&mut encoder)?; + } + Ok(()) + } +} + +impl Decode for String { + fn decode(decoder: D) -> Result { + let bytes = Vec::::decode(decoder)?; + String::from_utf8(bytes).map_err(|e| DecodeError::Utf8(e.utf8_error())) + } +} + +impl Encode for String { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.as_bytes().encode(encoder) + } +} + +impl Decode for Box +where + T: Decode, +{ + fn decode(decoder: D) -> Result { + let t = T::decode(decoder)?; + Ok(Box::new(t)) + } +} + +impl Encode for Box +where + T: Encode, +{ + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + T::encode(self, encoder) + } +} + +impl Decode for Box<[T]> +where + T: Decode, +{ + fn decode(decoder: D) -> Result { + let vec = Vec::decode(decoder)?; + Ok(vec.into_boxed_slice()) + } +} + +impl Encode for Box<[T]> +where + T: Encode, +{ + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + self.len().encode(&mut encoder)?; + for item in self.iter() { + item.encode(&mut encoder)?; + } + Ok(()) + } +} + +impl<'cow, T> Decode for Cow<'cow, T> +where + T: Decode + Clone, +{ + fn decode(decoder: D) -> Result { + let t = T::decode(decoder)?; + Ok(Cow::Owned(t)) + } +} + +impl<'cow, T> Encode for Cow<'cow, T> +where + T: Encode + Clone, +{ + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.as_ref().encode(encoder) + } +} + +impl Decode for Rc +where + T: Decode, +{ + fn decode(decoder: D) -> Result { + let t = T::decode(decoder)?; + Ok(Rc::new(t)) + } +} + +impl Encode for Rc +where + T: Encode, +{ + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + T::encode(self, encoder) + } +} + +#[cfg(feature = "atomic")] +impl Decode for Arc +where + T: Decode, +{ + fn decode(decoder: D) -> Result { + let t = T::decode(decoder)?; + Ok(Arc::new(t)) + } +} + +#[cfg(feature = "atomic")] +impl Encode for Arc +where + T: Encode, +{ + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + T::encode(self, encoder) + } +} diff --git a/src/features/impl_std.rs b/src/features/impl_std.rs new file mode 100644 index 000000000..649c27cc4 --- /dev/null +++ b/src/features/impl_std.rs @@ -0,0 +1,346 @@ +use crate::{ + config::Config, + de::{read::Reader, BorrowDecode, BorrowDecoder, Decode, Decoder, DecoderImpl}, + enc::{write::Writer, Encode, Encoder, EncoderImpl}, + error::{DecodeError, EncodeError}, +}; +use core::time::Duration; +use std::{ + ffi::{CStr, CString}, + io::Read, + net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6}, + path::{Path, PathBuf}, + sync::{Mutex, RwLock}, + time::SystemTime, +}; + +/// Decode type `D` from the given reader with the given `Config`. The reader can be any type that implements `std::io::Read`, e.g. `std::fs::File`. +/// +/// See the [config] module for more information about config options. +/// +/// [config]: config/index.html +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] +pub fn decode_from_std_read( + src: &mut R, + _config: C, +) -> Result { + let reader = IoReader { reader: src }; + let mut decoder = DecoderImpl::<_, C>::new(reader, _config); + D::decode(&mut decoder) +} + +struct IoReader { + reader: R, +} + +impl Reader for IoReader +where + R: std::io::Read, +{ + #[inline(always)] + fn read(&mut self, bytes: &mut [u8]) -> Result<(), DecodeError> { + match self.reader.read_exact(bytes) { + Ok(_) => Ok(()), + Err(_) => Err(DecodeError::UnexpectedEnd), + } + } +} + +impl Reader for std::io::BufReader +where + R: std::io::Read, +{ + fn read(&mut self, bytes: &mut [u8]) -> Result<(), DecodeError> { + match self.read_exact(bytes) { + Ok(_) => Ok(()), + Err(_) => Err(DecodeError::UnexpectedEnd), + } + } + + #[inline] + fn peek_read(&self, n: usize) -> Option<&[u8]> { + self.buffer().get(..n) + } + + #[inline] + fn consume(&mut self, n: usize) { + ::consume(self, n); + } +} + +/// Encode the given value into any type that implements `std::io::Write`, e.g. `std::fs::File`, with the given `Config`. +/// See the [config] module for more information. +/// +/// [config]: config/index.html +#[cfg_attr(docsrs, doc(cfg(feature = "std")))] +pub fn encode_into_std_write( + val: E, + dst: &mut W, + config: C, +) -> Result { + let writer = IoWriter { + writer: dst, + bytes_written: 0, + }; + let mut encoder = EncoderImpl::<_, C>::new(writer, config); + val.encode(&mut encoder)?; + Ok(encoder.into_writer().bytes_written) +} + +struct IoWriter<'a, W: std::io::Write> { + writer: &'a mut W, + bytes_written: usize, +} + +impl<'storage, W: std::io::Write> Writer for IoWriter<'storage, W> { + #[inline(always)] + fn write(&mut self, bytes: &[u8]) -> Result<(), EncodeError> { + self.writer + .write_all(bytes) + .map_err(|error| EncodeError::Io { + error, + index: self.bytes_written, + })?; + self.bytes_written += bytes.len(); + Ok(()) + } +} + +impl<'a> Encode for &'a CStr { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.to_bytes_with_nul().encode(encoder) + } +} + +impl<'de> BorrowDecode<'de> for &'de CStr { + fn borrow_decode>(decoder: D) -> Result { + let bytes = <&[u8]>::borrow_decode(decoder)?; + CStr::from_bytes_with_nul(bytes).map_err(|e| DecodeError::CStrNulError { inner: e }) + } +} + +impl Encode for CString { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.as_bytes_with_nul().encode(encoder) + } +} + +impl Decode for CString { + fn decode(decoder: D) -> Result { + // BlockedTODO: https://github.com/rust-lang/rust/issues/73179 + // use `from_vec_with_nul` instead, combined with: + // let bytes = std::vec::Vec::::decode(decoder)?; + + // now we have to allocate twice unfortunately + let vec: std::vec::Vec = std::vec::Vec::decode(decoder)?; + let cstr = + CStr::from_bytes_with_nul(&vec).map_err(|e| DecodeError::CStrNulError { inner: e })?; + Ok(cstr.into()) + } +} + +impl Encode for Mutex +where + T: Encode, +{ + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + let t = self.lock().map_err(|_| EncodeError::LockFailed { + type_name: core::any::type_name::>(), + })?; + t.encode(encoder) + } +} + +impl Decode for Mutex +where + T: Decode, +{ + fn decode(decoder: D) -> Result { + let t = T::decode(decoder)?; + Ok(Mutex::new(t)) + } +} + +impl Encode for RwLock +where + T: Encode, +{ + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + let t = self.read().map_err(|_| EncodeError::LockFailed { + type_name: core::any::type_name::>(), + })?; + t.encode(encoder) + } +} + +impl Decode for RwLock +where + T: Decode, +{ + fn decode(decoder: D) -> Result { + let t = T::decode(decoder)?; + Ok(RwLock::new(t)) + } +} + +impl Encode for SystemTime { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + let duration = self.duration_since(SystemTime::UNIX_EPOCH).map_err(|e| { + EncodeError::InvalidSystemTime { + inner: e, + time: *self, + } + })?; + duration.encode(encoder) + } +} + +impl Decode for SystemTime { + fn decode(decoder: D) -> Result { + let duration = Duration::decode(decoder)?; + Ok(SystemTime::UNIX_EPOCH + duration) + } +} + +impl Encode for &'_ Path { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + match self.to_str() { + Some(str) => str.encode(encoder), + None => Err(EncodeError::InvalidPathCharacters), + } + } +} + +impl<'de> BorrowDecode<'de> for &'de Path { + fn borrow_decode>(decoder: D) -> Result { + let str = <&'de str>::borrow_decode(decoder)?; + Ok(Path::new(str)) + } +} + +impl Encode for PathBuf { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.as_path().encode(encoder) + } +} + +impl Decode for PathBuf { + fn decode(decoder: D) -> Result { + let string = std::string::String::decode(decoder)?; + Ok(string.into()) + } +} + +impl Encode for IpAddr { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match self { + IpAddr::V4(v4) => { + 0u32.encode(&mut encoder)?; + v4.encode(encoder) + } + IpAddr::V6(v6) => { + 1u32.encode(&mut encoder)?; + v6.encode(encoder) + } + } + } +} + +impl Decode for IpAddr { + fn decode(mut decoder: D) -> Result { + match u32::decode(&mut decoder)? { + 0 => Ok(IpAddr::V4(Ipv4Addr::decode(decoder)?)), + 1 => Ok(IpAddr::V6(Ipv6Addr::decode(decoder)?)), + found => Err(DecodeError::UnexpectedVariant { + min: 0, + max: 1, + found, + type_name: core::any::type_name::(), + }), + } + } +} + +impl Encode for Ipv4Addr { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.octets().encode(encoder) + } +} + +impl Decode for Ipv4Addr { + fn decode(decoder: D) -> Result { + Ok(Self::from(<[u8; 4]>::decode(decoder)?)) + } +} + +impl Encode for Ipv6Addr { + fn encode(&self, encoder: E) -> Result<(), EncodeError> { + self.octets().encode(encoder) + } +} + +impl Decode for Ipv6Addr { + fn decode(decoder: D) -> Result { + Ok(Self::from(<[u8; 16]>::decode(decoder)?)) + } +} + +impl Encode for SocketAddr { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + match self { + SocketAddr::V4(v4) => { + 0u32.encode(&mut encoder)?; + v4.encode(encoder) + } + SocketAddr::V6(v6) => { + 1u32.encode(&mut encoder)?; + v6.encode(encoder) + } + } + } +} + +impl Decode for SocketAddr { + fn decode(mut decoder: D) -> Result { + match u32::decode(&mut decoder)? { + 0 => Ok(SocketAddr::V4(SocketAddrV4::decode(decoder)?)), + 1 => Ok(SocketAddr::V6(SocketAddrV6::decode(decoder)?)), + found => Err(DecodeError::UnexpectedVariant { + min: 0, + max: 1, + found, + type_name: core::any::type_name::(), + }), + } + } +} + +impl Encode for SocketAddrV4 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + self.ip().encode(&mut encoder)?; + self.port().encode(encoder) + } +} + +impl Decode for SocketAddrV4 { + fn decode(mut decoder: D) -> Result { + let ip = Ipv4Addr::decode(&mut decoder)?; + let port = u16::decode(decoder)?; + Ok(Self::new(ip, port)) + } +} + +impl Encode for SocketAddrV6 { + fn encode(&self, mut encoder: E) -> Result<(), EncodeError> { + self.ip().encode(&mut encoder)?; + self.port().encode(encoder) + } +} + +impl Decode for SocketAddrV6 { + fn decode(mut decoder: D) -> Result { + let ip = Ipv6Addr::decode(&mut decoder)?; + let port = u16::decode(decoder)?; + Ok(Self::new(ip, port, 0, 0)) + } +} diff --git a/src/features/mod.rs b/src/features/mod.rs new file mode 100644 index 000000000..ab1b3097f --- /dev/null +++ b/src/features/mod.rs @@ -0,0 +1,19 @@ +#[cfg(feature = "atomic")] +mod atomic; +#[cfg(feature = "atomic")] +pub use self::atomic::*; + +#[cfg(feature = "alloc")] +mod impl_alloc; +#[cfg(feature = "alloc")] +pub use self::impl_alloc::*; + +#[cfg(feature = "std")] +mod impl_std; +#[cfg(feature = "std")] +pub use self::impl_std::*; + +#[cfg(feature = "derive")] +mod derive; +#[cfg(feature = "derive")] +pub use self::derive::*; diff --git a/src/internal.rs b/src/internal.rs deleted file mode 100644 index 78f7b2d81..000000000 --- a/src/internal.rs +++ /dev/null @@ -1,123 +0,0 @@ -use std::io::{Read, Write}; -use std::marker::PhantomData; - -use crate::config::{Infinite, InternalOptions, Options, SizeLimit, TrailingBytes}; -use crate::de::read::BincodeRead; -use crate::Result; - -pub(crate) fn serialize_into(writer: W, value: &T, mut options: O) -> Result<()> -where - W: Write, - T: serde::Serialize, - O: InternalOptions, -{ - if options.limit().limit().is_some() { - // "compute" the size for the side-effect - // of returning Err if the bound was reached. - serialized_size(value, &mut options)?; - } - - let mut serializer = crate::ser::Serializer::<_, O>::new(writer, options); - serde::Serialize::serialize(value, &mut serializer) -} - -pub(crate) fn serialize(value: &T, mut options: O) -> Result> -where - T: serde::Serialize, - O: InternalOptions, -{ - let mut writer = { - let actual_size = serialized_size(value, &mut options)?; - Vec::with_capacity(actual_size as usize) - }; - - serialize_into(&mut writer, value, options.with_no_limit())?; - Ok(writer) -} - -pub(crate) fn serialized_size(value: &T, options: O) -> Result -where - T: serde::Serialize, -{ - let mut size_counter = crate::ser::SizeChecker { options, total: 0 }; - - let result = value.serialize(&mut size_counter); - result.map(|_| size_counter.total) -} - -pub(crate) fn deserialize_from(reader: R, options: O) -> Result -where - R: Read, - T: serde::de::DeserializeOwned, - O: InternalOptions, -{ - deserialize_from_seed(PhantomData, reader, options) -} - -pub(crate) fn deserialize_from_seed<'a, R, T, O>(seed: T, reader: R, options: O) -> Result -where - R: Read, - T: serde::de::DeserializeSeed<'a>, - O: InternalOptions, -{ - let reader = crate::de::read::IoReader::new(reader); - deserialize_from_custom_seed(seed, reader, options) -} - -pub(crate) fn deserialize_from_custom<'a, R, T, O>(reader: R, options: O) -> Result -where - R: BincodeRead<'a>, - T: serde::de::DeserializeOwned, - O: InternalOptions, -{ - deserialize_from_custom_seed(PhantomData, reader, options) -} - -pub(crate) fn deserialize_from_custom_seed<'a, R, T, O>( - seed: T, - reader: R, - options: O, -) -> Result -where - R: BincodeRead<'a>, - T: serde::de::DeserializeSeed<'a>, - O: InternalOptions, -{ - let mut deserializer = crate::de::Deserializer::<_, O>::with_bincode_read(reader, options); - seed.deserialize(&mut deserializer) -} - -pub(crate) fn deserialize_in_place<'a, R, T, O>(reader: R, options: O, place: &mut T) -> Result<()> -where - R: BincodeRead<'a>, - T: serde::de::Deserialize<'a>, - O: InternalOptions, -{ - let mut deserializer = crate::de::Deserializer::<_, _>::with_bincode_read(reader, options); - serde::Deserialize::deserialize_in_place(&mut deserializer, place) -} - -pub(crate) fn deserialize<'a, T, O>(bytes: &'a [u8], options: O) -> Result -where - T: serde::de::Deserialize<'a>, - O: InternalOptions, -{ - deserialize_seed(PhantomData, bytes, options) -} - -pub(crate) fn deserialize_seed<'a, T, O>(seed: T, bytes: &'a [u8], options: O) -> Result -where - T: serde::de::DeserializeSeed<'a>, - O: InternalOptions, -{ - let options = crate::config::WithOtherLimit::new(options, Infinite); - - let reader = crate::de::read::SliceReader::new(bytes); - let mut deserializer = crate::de::Deserializer::with_bincode_read(reader, options); - let val = seed.deserialize(&mut deserializer)?; - - match O::Trailing::check_end(&deserializer.reader) { - Ok(_) => Ok(val), - Err(err) => Err(err), - } -} diff --git a/src/lib.rs b/src/lib.rs index d767a4d9f..5403aec85 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,131 +1,145 @@ -#![deny(missing_docs)] +#![no_std] +#![warn(missing_docs, unused_lifetimes)] +#![cfg_attr(docsrs, feature(doc_cfg))] //! Bincode is a crate for encoding and decoding using a tiny binary //! serialization strategy. Using it, you can easily go from having //! an object in memory, quickly serialize it to bytes, and then //! deserialize it back just as fast! //! -//! ### Using Basic Functions +//! # Serde //! -//! ```edition2018 -//! fn main() { -//! // The object that we will serialize. -//! let target: Option = Some("hello world".to_string()); +//! Starting from bincode 2, serde is now an optional dependency. If you want to use serde, please enable the `serde` feature. See [Features](#features) for more information. //! -//! let encoded: Vec = bincode::serialize(&target).unwrap(); -//! let decoded: Option = bincode::deserialize(&encoded[..]).unwrap(); -//! assert_eq!(target, decoded); -//! } -//! ``` +//! # Features +//! +//! |Name |Default?|Supported types for Encode/Decode|Enabled methods |Other| +//! |------|--------|-----------------------------------------|-----------------------------------------------------------------|-----| +//! |std | Yes ||`decode_from_reader` and `encode_into_writer`| +//! |alloc | Yes |All common containers in alloc, like `Vec`, `String`, `Box`|`encode_to_vec`| +//! |atomic| Yes |All `Atomic*` integer types, e.g. `AtomicUsize`, and `AtomicBool`|| +//! |derive| Yes |||Enables the `Encode` and `Decode` derive macro| +//! |serde | No |TODO|TODO|TODO| +//! +//! # Example +//! +//! ```rust +//! use bincode::config::Configuration; +//! +//! let mut slice = [0u8; 100]; //! -//! ### 128bit numbers +//! // You can encode any type that implements `enc::Encode`. +//! // You can automatically implement this trait on custom types with the `derive` feature. +//! let input = ( +//! 0u8, +//! 10u32, +//! 10000i128, +//! 'a', +//! [0u8, 1u8, 2u8, 3u8] +//! ); //! -//! Support for `i128` and `u128` is automatically enabled on Rust toolchains -//! greater than or equal to `1.26.0` and disabled for targets which do not support it +//! let length = bincode::encode_into_slice( +//! input, +//! &mut slice, +//! Configuration::standard() +//! ).unwrap(); +//! +//! let slice = &slice[..length]; +//! println!("Bytes written: {:?}", slice); +//! +//! // Decoding works the same as encoding. +//! // The trait used is `de::Decode`, and can also be automatically implemented with the `derive` feature. +//! let decoded: (u8, u32, i128, char, [u8; 4]) = bincode::decode_from_slice(slice, Configuration::standard()).unwrap(); +//! +//! assert_eq!(decoded, input); +//! ``` -#![doc(html_root_url = "https://docs.rs/bincode/2.0.0-dev")] +#![doc(html_root_url = "https://docs.rs/bincode/2.0.0-alpha.0")] #![crate_name = "bincode"] #![crate_type = "rlib"] #![crate_type = "dylib"] -extern crate byteorder; -#[macro_use] -extern crate serde; +#[cfg(feature = "alloc")] +extern crate alloc; +#[cfg(any(feature = "std", test))] +extern crate std; + +mod features; +pub(crate) mod utils; +pub(crate) mod varint; + +use de::read::Reader; +use enc::write::Writer; +pub use features::*; pub mod config; -/// Deserialize bincode data to a Rust data structure. pub mod de; +pub mod enc; +pub mod error; -mod error; -mod internal; -mod ser; - -pub use crate::config::{DefaultOptions, Options}; -pub use crate::de::read::BincodeRead; -pub use crate::de::Deserializer; -pub use crate::error::{Error, ErrorKind, Result}; -pub use crate::ser::Serializer; +use config::Config; -/// Get a default configuration object. +/// Encode the given value into the given slice. Returns the amount of bytes that have been written. /// -/// ### Default Configuration: +/// See the [config] module for more information on configurations. /// -/// | Byte limit | Endianness | Int Encoding | Trailing Behavior | -/// |------------|------------|--------------|-------------------| -/// | Unlimited | Little | Varint | Reject | -#[inline(always)] -pub fn options() -> DefaultOptions { - DefaultOptions::new() +/// [config]: config/index.html +pub fn encode_into_slice( + val: E, + dst: &mut [u8], + config: C, +) -> Result { + let writer = enc::write::SliceWriter::new(dst); + let mut encoder = enc::EncoderImpl::<_, C>::new(writer, config); + val.encode(&mut encoder)?; + Ok(encoder.into_writer().bytes_written()) } -/// Serializes an object directly into a `Writer` using the default configuration. +/// Encode the given value into a custom [Writer]. /// -/// If the serialization would take more bytes than allowed by the size limit, an error -/// is returned and *no bytes* will be written into the `Writer`. -pub fn serialize_into(writer: W, value: &T) -> Result<()> -where - W: std::io::Write, - T: serde::Serialize, -{ - DefaultOptions::new().serialize_into(writer, value) -} - -/// Serializes a serializable object into a `Vec` of bytes using the default configuration. -pub fn serialize(value: &T) -> Result> -where - T: serde::Serialize, -{ - DefaultOptions::new().serialize(value) -} - -/// Deserializes an object directly from a `Read`er using the default configuration. +/// See the [config] module for more information on configurations. /// -/// If this returns an `Error`, `reader` may be in an invalid state. -pub fn deserialize_from(reader: R) -> Result -where - R: std::io::Read, - T: serde::de::DeserializeOwned, -{ - DefaultOptions::new().deserialize_from(reader) +/// [config]: config/index.html +pub fn encode_into_writer( + val: E, + writer: W, + config: C, +) -> Result<(), error::EncodeError> { + let mut encoder = enc::EncoderImpl::<_, C>::new(writer, config); + val.encode(&mut encoder)?; + Ok(()) } -/// Deserializes an object from a custom `BincodeRead`er using the default configuration. -/// It is highly recommended to use `deserialize_from` unless you need to implement -/// `BincodeRead` for performance reasons. +/// Attempt to decode a given type `D` from the given slice. /// -/// If this returns an `Error`, `reader` may be in an invalid state. -pub fn deserialize_from_custom<'a, R, T>(reader: R) -> Result -where - R: de::read::BincodeRead<'a>, - T: serde::de::DeserializeOwned, -{ - DefaultOptions::new().deserialize_from_custom(reader) -} - -/// Only use this if you know what you're doing. +/// See the [config] module for more information on configurations. /// -/// This is part of the public API. -#[doc(hidden)] -pub fn deserialize_in_place<'a, R, T>(reader: R, place: &mut T) -> Result<()> -where - T: serde::de::Deserialize<'a>, - R: BincodeRead<'a>, -{ - DefaultOptions::new().deserialize_in_place(reader, place) +/// [config]: config/index.html +pub fn decode_from_slice<'a, D: de::BorrowDecode<'a>, C: Config>( + src: &'a [u8], + _config: C, +) -> Result { + let reader = de::read::SliceReader::new(src); + let mut decoder = de::DecoderImpl::<_, C>::new(reader, _config); + D::borrow_decode(&mut decoder) } -/// Deserializes a slice of bytes into an instance of `T` using the default configuration. -pub fn deserialize<'a, T>(bytes: &'a [u8]) -> Result -where - T: serde::de::Deserialize<'a>, -{ - DefaultOptions::new().deserialize(bytes) +/// Attempt to decode a given type `D` from the given [Reader]. +/// +/// See the [config] module for more information on configurations. +/// +/// [config]: config/index.html +pub fn decode_from_reader( + reader: R, + _config: C, +) -> Result { + let mut decoder = de::DecoderImpl::<_, C>::new(reader, _config); + D::decode(&mut decoder) } -/// Returns the size that an object would be if serialized using Bincode with the default configuration. -pub fn serialized_size(value: &T) -> Result -where - T: serde::Serialize, -{ - DefaultOptions::new().serialized_size(value) +// TODO: Currently our doctests fail when trying to include the specs because the specs depend on `derive` and `alloc`. +// But we want to have the specs in the docs always +#[cfg(all(feature = "alloc", feature = "derive"))] +pub mod spec { + #![doc = include_str!("../docs/spec.md")] } diff --git a/src/ser/mod.rs b/src/ser/mod.rs deleted file mode 100644 index 911a5e3a1..000000000 --- a/src/ser/mod.rs +++ /dev/null @@ -1,770 +0,0 @@ -use std::io::Write; -use std::u32; - -use byteorder::WriteBytesExt; - -use super::config::{IntEncoding, SizeLimit}; -use super::{Error, ErrorKind, Result}; -use crate::config::{BincodeByteOrder, Options}; -use std::mem::size_of; - -/// An Serializer that encodes values directly into a Writer. -/// -/// The specified byte-order will impact the endianness that is -/// used during the encoding. -/// -/// This struct should not be used often. -/// For most cases, prefer the `encode_into` function. -pub struct Serializer { - writer: W, - _options: O, -} - -macro_rules! impl_serialize_literal { - ($ser_method:ident($ty:ty) = $write:ident()) => { - pub(crate) fn $ser_method(&mut self, v: $ty) -> Result<()> { - self.writer - .$write::<::Endian>(v) - .map_err(Into::into) - } - }; -} - -impl Serializer { - /// Creates a new Serializer with the given `Write`r. - pub fn new(w: W, options: O) -> Serializer { - Serializer { - writer: w, - _options: options, - } - } - - pub(crate) fn serialize_byte(&mut self, v: u8) -> Result<()> { - self.writer.write_u8(v).map_err(Into::into) - } - - impl_serialize_literal! {serialize_literal_u16(u16) = write_u16()} - impl_serialize_literal! {serialize_literal_u32(u32) = write_u32()} - impl_serialize_literal! {serialize_literal_u64(u64) = write_u64()} - - serde_if_integer128! { - impl_serialize_literal!{serialize_literal_u128(u128) = write_u128()} - } -} - -macro_rules! impl_serialize_int { - ($ser_method:ident($ty:ty) = $ser_int:ident()) => { - fn $ser_method(self, v: $ty) -> Result<()> { - O::IntEncoding::$ser_int(self, v) - } - }; -} - -impl<'a, W: Write, O: Options> serde::Serializer for &'a mut Serializer { - type Ok = (); - type Error = Error; - type SerializeSeq = Compound<'a, W, O>; - type SerializeTuple = Compound<'a, W, O>; - type SerializeTupleStruct = Compound<'a, W, O>; - type SerializeTupleVariant = Compound<'a, W, O>; - type SerializeMap = Compound<'a, W, O>; - type SerializeStruct = Compound<'a, W, O>; - type SerializeStructVariant = Compound<'a, W, O>; - - fn serialize_unit(self) -> Result<()> { - Ok(()) - } - - fn serialize_unit_struct(self, _: &'static str) -> Result<()> { - Ok(()) - } - - fn serialize_bool(self, v: bool) -> Result<()> { - self.serialize_byte(v as u8) - } - - fn serialize_u8(self, v: u8) -> Result<()> { - self.serialize_byte(v) - } - - impl_serialize_int! {serialize_u16(u16) = serialize_u16()} - impl_serialize_int! {serialize_u32(u32) = serialize_u32()} - impl_serialize_int! {serialize_u64(u64) = serialize_u64()} - - fn serialize_i8(self, v: i8) -> Result<()> { - self.serialize_byte(v as u8) - } - - impl_serialize_int! {serialize_i16(i16) = serialize_i16()} - impl_serialize_int! {serialize_i32(i32) = serialize_i32()} - impl_serialize_int! {serialize_i64(i64) = serialize_i64()} - - serde_if_integer128! { - impl_serialize_int!{serialize_u128(u128) = serialize_u128()} - impl_serialize_int!{serialize_i128(i128) = serialize_i128()} - } - - fn serialize_f32(self, v: f32) -> Result<()> { - self.writer - .write_f32::<::Endian>(v) - .map_err(Into::into) - } - - fn serialize_f64(self, v: f64) -> Result<()> { - self.writer - .write_f64::<::Endian>(v) - .map_err(Into::into) - } - - fn serialize_str(self, v: &str) -> Result<()> { - O::IntEncoding::serialize_len(self, v.len())?; - self.writer.write_all(v.as_bytes()).map_err(Into::into) - } - - fn serialize_char(self, c: char) -> Result<()> { - self.writer - .write_all(encode_utf8(c).as_slice()) - .map_err(Into::into) - } - - fn serialize_bytes(self, v: &[u8]) -> Result<()> { - O::IntEncoding::serialize_len(self, v.len())?; - self.writer.write_all(v).map_err(Into::into) - } - - fn serialize_none(self) -> Result<()> { - self.writer.write_u8(0).map_err(Into::into) - } - - fn serialize_some(self, v: &T) -> Result<()> - where - T: serde::Serialize, - { - self.writer.write_u8(1)?; - v.serialize(self) - } - - fn serialize_seq(self, len: Option) -> Result { - let len = len.ok_or(ErrorKind::SequenceMustHaveLength)?; - O::IntEncoding::serialize_len(self, len)?; - Ok(Compound { ser: self }) - } - - fn serialize_tuple(self, _len: usize) -> Result { - Ok(Compound { ser: self }) - } - - fn serialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - Ok(Compound { ser: self }) - } - - fn serialize_tuple_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - O::IntEncoding::serialize_u32(self, variant_index)?; - Ok(Compound { ser: self }) - } - - fn serialize_map(self, len: Option) -> Result { - let len = len.ok_or(ErrorKind::SequenceMustHaveLength)?; - O::IntEncoding::serialize_len(self, len)?; - Ok(Compound { ser: self }) - } - - fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { - Ok(Compound { ser: self }) - } - - fn serialize_struct_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - O::IntEncoding::serialize_u32(self, variant_index)?; - Ok(Compound { ser: self }) - } - - fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(self) - } - - fn serialize_newtype_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - value: &T, - ) -> Result<()> - where - T: serde::ser::Serialize, - { - O::IntEncoding::serialize_u32(self, variant_index)?; - value.serialize(self) - } - - fn serialize_unit_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - ) -> Result<()> { - O::IntEncoding::serialize_u32(self, variant_index) - } - - fn is_human_readable(&self) -> bool { - false - } -} - -pub(crate) struct SizeChecker { - pub options: O, - pub total: u64, -} - -impl SizeChecker { - fn add_raw(&mut self, size: u64) -> Result<()> { - self.options.limit().add(size)?; - self.total += size; - - Ok(()) - } - - fn add_discriminant(&mut self, idx: u32) -> Result<()> { - let bytes = O::IntEncoding::u32_size(idx); - self.add_raw(bytes) - } - - fn add_len(&mut self, len: usize) -> Result<()> { - let bytes = O::IntEncoding::len_size(len); - self.add_raw(bytes) - } -} - -macro_rules! impl_size_int { - ($ser_method:ident($ty:ty) = $size_method:ident()) => { - fn $ser_method(self, v: $ty) -> Result<()> { - self.add_raw(O::IntEncoding::$size_method(v)) - } - }; -} - -impl<'a, O: Options> serde::Serializer for &'a mut SizeChecker { - type Ok = (); - type Error = Error; - type SerializeSeq = SizeCompound<'a, O>; - type SerializeTuple = SizeCompound<'a, O>; - type SerializeTupleStruct = SizeCompound<'a, O>; - type SerializeTupleVariant = SizeCompound<'a, O>; - type SerializeMap = SizeCompound<'a, O>; - type SerializeStruct = SizeCompound<'a, O>; - type SerializeStructVariant = SizeCompound<'a, O>; - - fn serialize_unit(self) -> Result<()> { - Ok(()) - } - - fn serialize_unit_struct(self, _: &'static str) -> Result<()> { - Ok(()) - } - - fn serialize_bool(self, _: bool) -> Result<()> { - self.add_raw(1) - } - - fn serialize_u8(self, _: u8) -> Result<()> { - self.add_raw(1) - } - fn serialize_i8(self, _: i8) -> Result<()> { - self.add_raw(1) - } - - impl_size_int! {serialize_u16(u16) = u16_size()} - impl_size_int! {serialize_u32(u32) = u32_size()} - impl_size_int! {serialize_u64(u64) = u64_size()} - impl_size_int! {serialize_i16(i16) = i16_size()} - impl_size_int! {serialize_i32(i32) = i32_size()} - impl_size_int! {serialize_i64(i64) = i64_size()} - - serde_if_integer128! { - impl_size_int!{serialize_u128(u128) = u128_size()} - impl_size_int!{serialize_i128(i128) = i128_size()} - } - - fn serialize_f32(self, _: f32) -> Result<()> { - self.add_raw(size_of::() as u64) - } - - fn serialize_f64(self, _: f64) -> Result<()> { - self.add_raw(size_of::() as u64) - } - - fn serialize_str(self, v: &str) -> Result<()> { - self.add_len(v.len())?; - self.add_raw(v.len() as u64) - } - - fn serialize_char(self, c: char) -> Result<()> { - self.add_raw(encode_utf8(c).as_slice().len() as u64) - } - - fn serialize_bytes(self, v: &[u8]) -> Result<()> { - self.add_len(v.len())?; - self.add_raw(v.len() as u64) - } - - fn serialize_none(self) -> Result<()> { - self.add_raw(1) - } - - fn serialize_some(self, v: &T) -> Result<()> - where - T: serde::Serialize, - { - self.add_raw(1)?; - v.serialize(self) - } - - fn serialize_seq(self, len: Option) -> Result { - let len = len.ok_or(ErrorKind::SequenceMustHaveLength)?; - - self.add_len(len)?; - Ok(SizeCompound { ser: self }) - } - - fn serialize_tuple(self, _len: usize) -> Result { - Ok(SizeCompound { ser: self }) - } - - fn serialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - Ok(SizeCompound { ser: self }) - } - - fn serialize_tuple_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - self.add_raw(O::IntEncoding::u32_size(variant_index))?; - Ok(SizeCompound { ser: self }) - } - - fn serialize_map(self, len: Option) -> Result { - let len = len.ok_or(ErrorKind::SequenceMustHaveLength)?; - - self.add_len(len)?; - Ok(SizeCompound { ser: self }) - } - - fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { - Ok(SizeCompound { ser: self }) - } - - fn serialize_struct_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - self.add_discriminant(variant_index)?; - Ok(SizeCompound { ser: self }) - } - - fn serialize_newtype_struct( - self, - _name: &'static str, - v: &V, - ) -> Result<()> { - v.serialize(self) - } - - fn serialize_unit_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - ) -> Result<()> { - self.add_discriminant(variant_index) - } - - fn serialize_newtype_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - value: &V, - ) -> Result<()> { - self.add_discriminant(variant_index)?; - value.serialize(self) - } - - fn is_human_readable(&self) -> bool { - false - } -} - -pub struct Compound<'a, W: 'a, O: Options + 'a> { - ser: &'a mut Serializer, -} - -impl<'a, W, O> serde::ser::SerializeSeq for Compound<'a, W, O> -where - W: Write, - O: Options, -{ - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_element(&mut self, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, W, O> serde::ser::SerializeTuple for Compound<'a, W, O> -where - W: Write, - O: Options, -{ - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_element(&mut self, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, W, O> serde::ser::SerializeTupleStruct for Compound<'a, W, O> -where - W: Write, - O: Options, -{ - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_field(&mut self, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, W, O> serde::ser::SerializeTupleVariant for Compound<'a, W, O> -where - W: Write, - O: Options, -{ - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_field(&mut self, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, W, O> serde::ser::SerializeMap for Compound<'a, W, O> -where - W: Write, - O: Options, -{ - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_key(&mut self, value: &K) -> Result<()> - where - K: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn serialize_value(&mut self, value: &V) -> Result<()> - where - V: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, W, O> serde::ser::SerializeStruct for Compound<'a, W, O> -where - W: Write, - O: Options, -{ - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_field(&mut self, _key: &'static str, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, W, O> serde::ser::SerializeStructVariant for Compound<'a, W, O> -where - W: Write, - O: Options, -{ - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_field(&mut self, _key: &'static str, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -pub(crate) struct SizeCompound<'a, S: Options + 'a> { - ser: &'a mut SizeChecker, -} - -impl<'a, O: Options> serde::ser::SerializeSeq for SizeCompound<'a, O> { - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_element(&mut self, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, O: Options> serde::ser::SerializeTuple for SizeCompound<'a, O> { - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_element(&mut self, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, O: Options> serde::ser::SerializeTupleStruct for SizeCompound<'a, O> { - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_field(&mut self, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, O: Options> serde::ser::SerializeTupleVariant for SizeCompound<'a, O> { - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_field(&mut self, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, O: Options + 'a> serde::ser::SerializeMap for SizeCompound<'a, O> { - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_key(&mut self, value: &K) -> Result<()> - where - K: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn serialize_value(&mut self, value: &V) -> Result<()> - where - V: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, O: Options> serde::ser::SerializeStruct for SizeCompound<'a, O> { - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_field(&mut self, _key: &'static str, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, O: Options> serde::ser::SerializeStructVariant for SizeCompound<'a, O> { - type Ok = (); - type Error = Error; - - #[inline] - fn serialize_field(&mut self, _key: &'static str, value: &T) -> Result<()> - where - T: serde::ser::Serialize, - { - value.serialize(&mut *self.ser) - } - - #[inline] - fn end(self) -> Result<()> { - Ok(()) - } -} -const TAG_CONT: u8 = 0b1000_0000; -const TAG_TWO_B: u8 = 0b1100_0000; -const TAG_THREE_B: u8 = 0b1110_0000; -const TAG_FOUR_B: u8 = 0b1111_0000; -const MAX_ONE_B: u32 = 0x80; -const MAX_TWO_B: u32 = 0x800; -const MAX_THREE_B: u32 = 0x10000; - -fn encode_utf8(c: char) -> EncodeUtf8 { - let code = c as u32; - let mut buf = [0; 4]; - let pos = if code < MAX_ONE_B { - buf[3] = code as u8; - 3 - } else if code < MAX_TWO_B { - buf[2] = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; - buf[3] = (code & 0x3F) as u8 | TAG_CONT; - 2 - } else if code < MAX_THREE_B { - buf[1] = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; - buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; - buf[3] = (code & 0x3F) as u8 | TAG_CONT; - 1 - } else { - buf[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; - buf[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT; - buf[2] = (code >> 6 & 0x3F) as u8 | TAG_CONT; - buf[3] = (code & 0x3F) as u8 | TAG_CONT; - 0 - }; - EncodeUtf8 { buf, pos } -} - -struct EncodeUtf8 { - buf: [u8; 4], - pos: usize, -} - -impl EncodeUtf8 { - fn as_slice(&self) -> &[u8] { - &self.buf[self.pos..] - } -} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 000000000..14332562e --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,3 @@ +pub trait Sealed {} + +impl<'a, T> Sealed for &'a mut T where T: Sealed {} diff --git a/src/varint/decode_signed.rs b/src/varint/decode_signed.rs new file mode 100644 index 000000000..d670493b2 --- /dev/null +++ b/src/varint/decode_signed.rs @@ -0,0 +1,86 @@ +use crate::{ + config::Endian, + de::read::Reader, + error::{DecodeError, IntegerType}, +}; + +pub fn varint_decode_i16(read: &mut R, endian: Endian) -> Result { + let n = super::varint_decode_u16(read, endian) + .map_err(DecodeError::change_integer_type_to_signed)?; + Ok(if n % 2 == 0 { + // positive number + (n / 2) as _ + } else { + // negative number + // !m * 2 + 1 = n + // !m * 2 = n - 1 + // !m = (n - 1) / 2 + // m = !((n - 1) / 2) + // since we have n is odd, we have floor(n / 2) = floor((n - 1) / 2) + !(n / 2) as _ + }) +} + +pub fn varint_decode_i32(read: &mut R, endian: Endian) -> Result { + let n = super::varint_decode_u32(read, endian) + .map_err(DecodeError::change_integer_type_to_signed)?; + Ok(if n % 2 == 0 { + // positive number + (n / 2) as _ + } else { + // negative number + // !m * 2 + 1 = n + // !m * 2 = n - 1 + // !m = (n - 1) / 2 + // m = !((n - 1) / 2) + // since we have n is odd, we have floor(n / 2) = floor((n - 1) / 2) + !(n / 2) as _ + }) +} + +pub fn varint_decode_i64(read: &mut R, endian: Endian) -> Result { + let n = super::varint_decode_u64(read, endian) + .map_err(DecodeError::change_integer_type_to_signed)?; + Ok(if n % 2 == 0 { + // positive number + (n / 2) as _ + } else { + // negative number + // !m * 2 + 1 = n + // !m * 2 = n - 1 + // !m = (n - 1) / 2 + // m = !((n - 1) / 2) + // since we have n is odd, we have floor(n / 2) = floor((n - 1) / 2) + !(n / 2) as _ + }) +} + +pub fn varint_decode_i128(read: &mut R, endian: Endian) -> Result { + let n = super::varint_decode_u128(read, endian) + .map_err(DecodeError::change_integer_type_to_signed)?; + Ok(if n % 2 == 0 { + // positive number + (n / 2) as _ + } else { + // negative number + // !m * 2 + 1 = n + // !m * 2 = n - 1 + // !m = (n - 1) / 2 + // m = !((n - 1) / 2) + // since we have n is odd, we have floor(n / 2) = floor((n - 1) / 2) + !(n / 2) as _ + }) +} + +pub fn varint_decode_isize(read: &mut R, endian: Endian) -> Result { + match varint_decode_i64(read, endian) { + Ok(val) => Ok(val as isize), + Err(DecodeError::InvalidIntegerType { found, .. }) => { + Err(DecodeError::InvalidIntegerType { + expected: IntegerType::Isize, + found: found.into_signed(), + }) + } + Err(e) => Err(e), + } +} diff --git a/src/varint/decode_unsigned.rs b/src/varint/decode_unsigned.rs new file mode 100644 index 000000000..d5574e079 --- /dev/null +++ b/src/varint/decode_unsigned.rs @@ -0,0 +1,628 @@ +use core::{convert::TryInto, u32}; + +use super::{SINGLE_BYTE_MAX, U128_BYTE, U16_BYTE, U32_BYTE, U64_BYTE}; +use crate::{ + config::Endian, + de::read::Reader, + error::{DecodeError, IntegerType}, +}; + +#[inline(never)] +#[cold] +fn deserialize_varint_cold_u16(read: &mut R, endian: Endian) -> Result +where + R: Reader, +{ + let mut bytes = [0u8; 1]; + read.read(&mut bytes)?; + match bytes[0] { + byte @ 0..=SINGLE_BYTE_MAX => Ok(byte as u16), + U16_BYTE => { + let mut bytes = [0u8; 2]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u16::from_be_bytes(bytes), + Endian::Little => u16::from_le_bytes(bytes), + }) + } + U32_BYTE => invalid_varint_discriminant(IntegerType::U16, IntegerType::U32), + U64_BYTE => invalid_varint_discriminant(IntegerType::U16, IntegerType::U64), + U128_BYTE => invalid_varint_discriminant(IntegerType::U16, IntegerType::U128), + _ => invalid_varint_discriminant(IntegerType::U16, IntegerType::Reserved), + } +} + +#[inline(never)] +#[cold] +fn deserialize_varint_cold_u32(read: &mut R, endian: Endian) -> Result +where + R: Reader, +{ + let mut bytes = [0u8; 1]; + read.read(&mut bytes)?; + match bytes[0] { + byte @ 0..=SINGLE_BYTE_MAX => Ok(byte as u32), + U16_BYTE => { + let mut bytes = [0u8; 2]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u16::from_be_bytes(bytes) as u32, + Endian::Little => u16::from_le_bytes(bytes) as u32, + }) + } + U32_BYTE => { + let mut bytes = [0u8; 4]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u32::from_be_bytes(bytes) as u32, + Endian::Little => u32::from_le_bytes(bytes) as u32, + }) + } + U64_BYTE => invalid_varint_discriminant(IntegerType::U32, IntegerType::U64), + U128_BYTE => invalid_varint_discriminant(IntegerType::U32, IntegerType::U128), + _ => invalid_varint_discriminant(IntegerType::U32, IntegerType::Reserved), + } +} + +#[inline(never)] +#[cold] +fn deserialize_varint_cold_u64(read: &mut R, endian: Endian) -> Result +where + R: Reader, +{ + let mut bytes = [0u8; 1]; + read.read(&mut bytes)?; + match bytes[0] { + byte @ 0..=SINGLE_BYTE_MAX => Ok(byte as u64), + U16_BYTE => { + let mut bytes = [0u8; 2]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u16::from_be_bytes(bytes) as u64, + Endian::Little => u16::from_le_bytes(bytes) as u64, + }) + } + U32_BYTE => { + let mut bytes = [0u8; 4]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u32::from_be_bytes(bytes) as u64, + Endian::Little => u32::from_le_bytes(bytes) as u64, + }) + } + U64_BYTE => { + let mut bytes = [0u8; 8]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u64::from_be_bytes(bytes) as u64, + Endian::Little => u64::from_le_bytes(bytes) as u64, + }) + } + U128_BYTE => invalid_varint_discriminant(IntegerType::U64, IntegerType::U128), + _ => invalid_varint_discriminant(IntegerType::U64, IntegerType::Reserved), + } +} + +#[inline(never)] +#[cold] +fn deserialize_varint_cold_usize(read: &mut R, endian: Endian) -> Result +where + R: Reader, +{ + let mut bytes = [0u8; 1]; + read.read(&mut bytes)?; + match bytes[0] { + byte @ 0..=SINGLE_BYTE_MAX => Ok(byte as usize), + U16_BYTE => { + let mut bytes = [0u8; 2]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u16::from_be_bytes(bytes) as usize, + Endian::Little => u16::from_le_bytes(bytes) as usize, + }) + } + U32_BYTE => { + let mut bytes = [0u8; 4]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u32::from_be_bytes(bytes) as usize, + Endian::Little => u32::from_le_bytes(bytes) as usize, + }) + } + U64_BYTE => { + let mut bytes = [0u8; 8]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u64::from_be_bytes(bytes) as usize, + Endian::Little => u64::from_le_bytes(bytes) as usize, + }) + } + U128_BYTE => invalid_varint_discriminant(IntegerType::Usize, IntegerType::U128), + _ => invalid_varint_discriminant(IntegerType::Usize, IntegerType::Reserved), + } +} + +#[inline(never)] +#[cold] +fn deserialize_varint_cold_u128(read: &mut R, endian: Endian) -> Result +where + R: Reader, +{ + let mut bytes = [0u8; 1]; + read.read(&mut bytes)?; + match bytes[0] { + byte @ 0..=SINGLE_BYTE_MAX => Ok(byte as u128), + U16_BYTE => { + let mut bytes = [0u8; 2]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u16::from_be_bytes(bytes) as u128, + Endian::Little => u16::from_le_bytes(bytes) as u128, + }) + } + U32_BYTE => { + let mut bytes = [0u8; 4]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u32::from_be_bytes(bytes) as u128, + Endian::Little => u32::from_le_bytes(bytes) as u128, + }) + } + U64_BYTE => { + let mut bytes = [0u8; 8]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u64::from_be_bytes(bytes) as u128, + Endian::Little => u64::from_le_bytes(bytes) as u128, + }) + } + U128_BYTE => { + let mut bytes = [0u8; 16]; + read.read(&mut bytes)?; + Ok(match endian { + Endian::Big => u128::from_be_bytes(bytes), + Endian::Little => u128::from_le_bytes(bytes), + }) + } + _ => invalid_varint_discriminant(IntegerType::U128, IntegerType::Reserved), + } +} + +#[inline(never)] +#[cold] +fn invalid_varint_discriminant( + expected: IntegerType, + found: IntegerType, +) -> Result { + Err(DecodeError::InvalidIntegerType { expected, found }) +} + +pub fn varint_decode_u16(read: &mut R, endian: Endian) -> Result { + if let Some(bytes) = read.peek_read(3) { + let (discriminant, bytes) = bytes.split_at(1); + let (out, used) = match discriminant[0] { + byte @ 0..=SINGLE_BYTE_MAX => (byte as u16, 1), + U16_BYTE => { + let val = match endian { + Endian::Big => u16::from_be_bytes(bytes[..2].try_into().unwrap()), + Endian::Little => u16::from_le_bytes(bytes[..2].try_into().unwrap()), + }; + + (val, 3) + } + U32_BYTE => return invalid_varint_discriminant(IntegerType::U16, IntegerType::U32), + U64_BYTE => return invalid_varint_discriminant(IntegerType::U16, IntegerType::U64), + U128_BYTE => return invalid_varint_discriminant(IntegerType::U16, IntegerType::U128), + _ => return invalid_varint_discriminant(IntegerType::U16, IntegerType::Reserved), + }; + + read.consume(used); + Ok(out) + } else { + deserialize_varint_cold_u16(read, endian) + } +} + +pub fn varint_decode_u32(read: &mut R, endian: Endian) -> Result { + if let Some(bytes) = read.peek_read(5) { + let (discriminant, bytes) = bytes.split_at(1); + let (out, used) = match discriminant[0] { + byte @ 0..=SINGLE_BYTE_MAX => (byte as u32, 1), + U16_BYTE => { + let val = match endian { + Endian::Big => u16::from_be_bytes(bytes[..2].try_into().unwrap()), + Endian::Little => u16::from_le_bytes(bytes[..2].try_into().unwrap()), + }; + + (val as u32, 3) + } + U32_BYTE => { + let val = match endian { + Endian::Big => u32::from_be_bytes(bytes[..4].try_into().unwrap()), + Endian::Little => u32::from_le_bytes(bytes[..4].try_into().unwrap()), + }; + + (val as u32, 5) + } + U64_BYTE => return invalid_varint_discriminant(IntegerType::U32, IntegerType::U64), + U128_BYTE => return invalid_varint_discriminant(IntegerType::U32, IntegerType::U128), + _ => return invalid_varint_discriminant(IntegerType::U32, IntegerType::Reserved), + }; + + read.consume(used); + Ok(out) + } else { + deserialize_varint_cold_u32(read, endian) + } +} + +pub fn varint_decode_u64(read: &mut R, endian: Endian) -> Result { + if let Some(bytes) = read.peek_read(9) { + let (discriminant, bytes) = bytes.split_at(1); + let (out, used) = match discriminant[0] { + byte @ 0..=SINGLE_BYTE_MAX => (byte as u64, 1), + U16_BYTE => { + let val = match endian { + Endian::Big => u16::from_be_bytes(bytes[..2].try_into().unwrap()), + Endian::Little => u16::from_le_bytes(bytes[..2].try_into().unwrap()), + }; + + (val as u64, 3) + } + U32_BYTE => { + let val = match endian { + Endian::Big => u32::from_be_bytes(bytes[..4].try_into().unwrap()), + Endian::Little => u32::from_le_bytes(bytes[..4].try_into().unwrap()), + }; + + (val as u64, 5) + } + U64_BYTE => { + let val = match endian { + Endian::Big => u64::from_be_bytes(bytes[..8].try_into().unwrap()), + Endian::Little => u64::from_le_bytes(bytes[..8].try_into().unwrap()), + }; + + (val as u64, 9) + } + U128_BYTE => return invalid_varint_discriminant(IntegerType::U32, IntegerType::U128), + _ => return invalid_varint_discriminant(IntegerType::U32, IntegerType::Reserved), + }; + + read.consume(used); + Ok(out) + } else { + deserialize_varint_cold_u64(read, endian) + } +} + +pub fn varint_decode_usize(read: &mut R, endian: Endian) -> Result { + if let Some(bytes) = read.peek_read(9) { + let (discriminant, bytes) = bytes.split_at(1); + let (out, used) = match discriminant[0] { + byte @ 0..=SINGLE_BYTE_MAX => (byte as usize, 1), + U16_BYTE => { + let val = match endian { + Endian::Big => u16::from_be_bytes(bytes[..2].try_into().unwrap()), + Endian::Little => u16::from_le_bytes(bytes[..2].try_into().unwrap()), + }; + + (val as usize, 3) + } + U32_BYTE => { + let val = match endian { + Endian::Big => u32::from_be_bytes(bytes[..4].try_into().unwrap()), + Endian::Little => u32::from_le_bytes(bytes[..4].try_into().unwrap()), + }; + + (val as usize, 5) + } + U64_BYTE => { + let val = match endian { + Endian::Big => u64::from_be_bytes(bytes[..8].try_into().unwrap()), + Endian::Little => u64::from_le_bytes(bytes[..8].try_into().unwrap()), + }; + + (val as usize, 9) + } + U128_BYTE => return invalid_varint_discriminant(IntegerType::Usize, IntegerType::U128), + _ => return invalid_varint_discriminant(IntegerType::Usize, IntegerType::Reserved), + }; + + read.consume(used); + Ok(out) + } else { + deserialize_varint_cold_usize(read, endian) + } +} + +pub fn varint_decode_u128(read: &mut R, endian: Endian) -> Result { + if let Some(bytes) = read.peek_read(17) { + let (discriminant, bytes) = bytes.split_at(1); + let (out, used) = match discriminant[0] { + byte @ 0..=SINGLE_BYTE_MAX => (byte as u128, 1), + U16_BYTE => { + let val = match endian { + Endian::Big => u16::from_be_bytes(bytes[..2].try_into().unwrap()), + Endian::Little => u16::from_le_bytes(bytes[..2].try_into().unwrap()), + }; + + (val as u128, 3) + } + U32_BYTE => { + let val = match endian { + Endian::Big => u32::from_be_bytes(bytes[..4].try_into().unwrap()), + Endian::Little => u32::from_le_bytes(bytes[..4].try_into().unwrap()), + }; + + (val as u128, 5) + } + U64_BYTE => { + let val = match endian { + Endian::Big => u64::from_be_bytes(bytes[..8].try_into().unwrap()), + Endian::Little => u64::from_le_bytes(bytes[..8].try_into().unwrap()), + }; + + (val as u128, 9) + } + U128_BYTE => { + let val = match endian { + Endian::Big => u128::from_be_bytes(bytes[..16].try_into().unwrap()), + Endian::Little => u128::from_le_bytes(bytes[..16].try_into().unwrap()), + }; + + (val as u128, 17) + } + _ => return invalid_varint_discriminant(IntegerType::Usize, IntegerType::Reserved), + }; + + read.consume(used); + Ok(out) + } else { + deserialize_varint_cold_u128(read, endian) + } +} + +#[test] +fn test_decode_u16() { + let cases: &[(&[u8], u16, u16)] = &[ + (&[0], 0, 0), + (&[10], 10, 10), + (&[U16_BYTE, 0, 10], 2560, 10), + ]; + for &(slice, expected_le, expected_be) in cases { + let mut reader = crate::de::read::SliceReader::new(slice); + let found = varint_decode_u16(&mut reader, Endian::Little).unwrap(); + assert_eq!(expected_le, found); + + let mut reader = crate::de::read::SliceReader::new(slice); + let found = varint_decode_u16(&mut reader, Endian::Big).unwrap(); + assert_eq!(expected_be, found); + } + + let errors: &[(&[u8], DecodeError)] = &[ + ( + &[U32_BYTE], + DecodeError::InvalidIntegerType { + expected: IntegerType::U16, + found: IntegerType::U32, + }, + ), + ( + &[U64_BYTE], + DecodeError::InvalidIntegerType { + expected: IntegerType::U16, + found: IntegerType::U64, + }, + ), + ( + &[U128_BYTE], + DecodeError::InvalidIntegerType { + expected: IntegerType::U16, + found: IntegerType::U128, + }, + ), + (&[U16_BYTE], DecodeError::UnexpectedEnd), + (&[U16_BYTE, 0], DecodeError::UnexpectedEnd), + ]; + + for (slice, expected) in errors { + let mut reader = crate::de::read::SliceReader::new(slice); + let found = varint_decode_u16(&mut reader, Endian::Little).unwrap_err(); + assert_eq!(std::format!("{:?}", expected), std::format!("{:?}", found)); + } +} + +#[test] +fn test_decode_u32() { + let cases: &[(&[u8], u32, u32)] = &[ + (&[0], 0, 0), + (&[10], 10, 10), + (&[U16_BYTE, 0, 10], 2560, 10), + (&[U32_BYTE, 0, 0, 0, 10], 167_772_160, 10), + ]; + for &(slice, expected_le, expected_be) in cases { + let mut reader = crate::de::read::SliceReader::new(slice); + let found = varint_decode_u32(&mut reader, Endian::Little).unwrap(); + assert_eq!(expected_le, found); + + let mut reader = crate::de::read::SliceReader::new(slice); + let found = varint_decode_u32(&mut reader, Endian::Big).unwrap(); + assert_eq!(expected_be, found); + } + + let errors: &[(&[u8], DecodeError)] = &[ + ( + &[U64_BYTE], + DecodeError::InvalidIntegerType { + expected: IntegerType::U32, + found: IntegerType::U64, + }, + ), + ( + &[U128_BYTE], + DecodeError::InvalidIntegerType { + expected: IntegerType::U32, + found: IntegerType::U128, + }, + ), + (&[U16_BYTE], DecodeError::UnexpectedEnd), + (&[U16_BYTE, 0], DecodeError::UnexpectedEnd), + (&[U32_BYTE], DecodeError::UnexpectedEnd), + (&[U32_BYTE, 0], DecodeError::UnexpectedEnd), + (&[U32_BYTE, 0, 0], DecodeError::UnexpectedEnd), + (&[U32_BYTE, 0, 0, 0], DecodeError::UnexpectedEnd), + ]; + + for (slice, expected) in errors { + let mut reader = crate::de::read::SliceReader::new(slice); + let found = varint_decode_u32(&mut reader, Endian::Little).unwrap_err(); + assert_eq!(std::format!("{:?}", expected), std::format!("{:?}", found)); + } +} + +#[test] +fn test_decode_u64() { + let cases: &[(&[u8], u64, u64)] = &[ + (&[0], 0, 0), + (&[10], 10, 10), + (&[U16_BYTE, 0, 10], 2560, 10), + (&[U32_BYTE, 0, 0, 0, 10], 167_772_160, 10), + ( + &[U64_BYTE, 0, 0, 0, 0, 0, 0, 0, 10], + 72_057_594_037_9279_360, + 10, + ), + ]; + for &(slice, expected_le, expected_be) in cases { + let mut reader = crate::de::read::SliceReader::new(slice); + let found = varint_decode_u64(&mut reader, Endian::Little).unwrap(); + assert_eq!(expected_le, found); + + let mut reader = crate::de::read::SliceReader::new(slice); + let found = varint_decode_u64(&mut reader, Endian::Big).unwrap(); + assert_eq!(expected_be, found); + } + + let errors: &[(&[u8], DecodeError)] = &[ + ( + &[U128_BYTE], + DecodeError::InvalidIntegerType { + expected: IntegerType::U64, + found: IntegerType::U128, + }, + ), + (&[U16_BYTE], DecodeError::UnexpectedEnd), + (&[U16_BYTE, 0], DecodeError::UnexpectedEnd), + (&[U32_BYTE], DecodeError::UnexpectedEnd), + (&[U32_BYTE, 0], DecodeError::UnexpectedEnd), + (&[U32_BYTE, 0, 0], DecodeError::UnexpectedEnd), + (&[U32_BYTE, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0, 0, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0, 0, 0, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0, 0, 0, 0, 0, 0, 0], DecodeError::UnexpectedEnd), + ]; + + for (slice, expected) in errors { + let mut reader = crate::de::read::SliceReader::new(slice); + let found = varint_decode_u64(&mut reader, Endian::Little).unwrap_err(); + assert_eq!(std::format!("{:?}", expected), std::format!("{:?}", found)); + } +} + +#[test] +fn test_decode_u128() { + let cases: &[(&[u8], u128, u128)] = &[ + (&[0], 0, 0), + (&[10], 10, 10), + (&[U16_BYTE, 0, 10], 2560, 10), + (&[U32_BYTE, 0, 0, 0, 10], 167_772_160, 10), + ( + &[U64_BYTE, 0, 0, 0, 0, 0, 0, 0, 10], + 72_057_594_037_9279_360, + 10, + ), + ( + &[U128_BYTE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10], + 13_292_279_957_849_158_729_038_070_602_803_445_760, + 10, + ), + ]; + for &(slice, expected_le, expected_be) in cases { + let mut reader = crate::de::read::SliceReader::new(slice); + let found = varint_decode_u128(&mut reader, Endian::Little).unwrap(); + assert_eq!(expected_le, found); + + let mut reader = crate::de::read::SliceReader::new(slice); + let found = varint_decode_u128(&mut reader, Endian::Big).unwrap(); + assert_eq!(expected_be, found); + } + + let errors: &[(&[u8], DecodeError)] = &[ + (&[U16_BYTE], DecodeError::UnexpectedEnd), + (&[U16_BYTE, 0], DecodeError::UnexpectedEnd), + (&[U32_BYTE], DecodeError::UnexpectedEnd), + (&[U32_BYTE, 0], DecodeError::UnexpectedEnd), + (&[U32_BYTE, 0, 0], DecodeError::UnexpectedEnd), + (&[U32_BYTE, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0, 0, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0, 0, 0, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U64_BYTE, 0, 0, 0, 0, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U128_BYTE], DecodeError::UnexpectedEnd), + (&[U128_BYTE, 0], DecodeError::UnexpectedEnd), + (&[U128_BYTE, 0, 0], DecodeError::UnexpectedEnd), + (&[U128_BYTE, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U128_BYTE, 0, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U128_BYTE, 0, 0, 0, 0, 0], DecodeError::UnexpectedEnd), + (&[U128_BYTE, 0, 0, 0, 0, 0, 0], DecodeError::UnexpectedEnd), + ( + &[U128_BYTE, 0, 0, 0, 0, 0, 0, 0], + DecodeError::UnexpectedEnd, + ), + ( + &[U128_BYTE, 0, 0, 0, 0, 0, 0, 0, 0], + DecodeError::UnexpectedEnd, + ), + ( + &[U128_BYTE, 0, 0, 0, 0, 0, 0, 0, 0, 0], + DecodeError::UnexpectedEnd, + ), + ( + &[U128_BYTE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + DecodeError::UnexpectedEnd, + ), + ( + &[U128_BYTE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + DecodeError::UnexpectedEnd, + ), + ( + &[U128_BYTE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + DecodeError::UnexpectedEnd, + ), + ( + &[U128_BYTE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + DecodeError::UnexpectedEnd, + ), + ( + &[U128_BYTE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + DecodeError::UnexpectedEnd, + ), + ]; + + for (slice, expected) in errors { + let mut reader = crate::de::read::SliceReader::new(slice); + let found = varint_decode_u128(&mut reader, Endian::Little).unwrap_err(); + std::dbg!(slice); + assert_eq!(std::format!("{:?}", expected), std::format!("{:?}", found)); + } +} diff --git a/src/varint/encode_signed.rs b/src/varint/encode_signed.rs new file mode 100644 index 000000000..0afa11c04 --- /dev/null +++ b/src/varint/encode_signed.rs @@ -0,0 +1,318 @@ +use super::{varint_encode_u128, varint_encode_u16, varint_encode_u32, varint_encode_u64}; +use crate::{config::Endian, enc::write::Writer, error::EncodeError}; + +pub fn varint_encode_i16( + writer: &mut W, + endian: Endian, + val: i16, +) -> Result<(), EncodeError> { + varint_encode_u16( + writer, + endian, + if val < 0 { + // let's avoid the edge case of i16::min_value() + // !n is equal to `-n - 1`, so this is: + // !n * 2 + 1 = 2(-n - 1) + 1 = -2n - 2 + 1 = -2n - 1 + !(val as u16) * 2 + 1 + } else { + (val as u16) * 2 + }, + ) +} + +pub fn varint_encode_i32( + writer: &mut W, + endian: Endian, + val: i32, +) -> Result<(), EncodeError> { + varint_encode_u32( + writer, + endian, + if val < 0 { + // let's avoid the edge case of i32::min_value() + // !n is equal to `-n - 1`, so this is: + // !n * 2 + 1 = 2(-n - 1) + 1 = -2n - 2 + 1 = -2n - 1 + !(val as u32) * 2 + 1 + } else { + (val as u32) * 2 + }, + ) +} + +pub fn varint_encode_i64( + writer: &mut W, + endian: Endian, + val: i64, +) -> Result<(), EncodeError> { + varint_encode_u64( + writer, + endian, + if val < 0 { + // let's avoid the edge case of i64::min_value() + // !n is equal to `-n - 1`, so this is: + // !n * 2 + 1 = 2(-n - 1) + 1 = -2n - 2 + 1 = -2n - 1 + !(val as u64) * 2 + 1 + } else { + (val as u64) * 2 + }, + ) +} + +pub fn varint_encode_i128( + writer: &mut W, + endian: Endian, + val: i128, +) -> Result<(), EncodeError> { + varint_encode_u128( + writer, + endian, + if val < 0 { + // let's avoid the edge case of i128::min_value() + // !n is equal to `-n - 1`, so this is: + // !n * 2 + 1 = 2(-n - 1) + 1 = -2n - 2 + 1 = -2n - 1 + !(val as u128) * 2 + 1 + } else { + (val as u128) * 2 + }, + ) +} + +pub fn varint_encode_isize( + writer: &mut W, + endian: Endian, + val: isize, +) -> Result<(), EncodeError> { + // isize is being encoded as a i64 + varint_encode_i64(writer, endian, val as i64) +} + +#[test] +fn test_encode_i16() { + let cases: &[(i16, &[u8], &[u8])] = &[ + (0, &[0], &[0]), + (2, &[4], &[4]), + (256, &[super::U16_BYTE, 0, 2], &[super::U16_BYTE, 2, 0]), + ( + 16_000, + &[super::U16_BYTE, 0, 125], + &[super::U16_BYTE, 125, 0], + ), + ( + i16::MAX - 1, + &[super::U16_BYTE, 252, 255], + &[super::U16_BYTE, 255, 252], + ), + ( + i16::MAX, + &[super::U16_BYTE, 254, 255], + &[super::U16_BYTE, 255, 254], + ), + ]; + + use crate::enc::write::SliceWriter; + let mut buffer = [0u8; 20]; + for &(value, expected_le, expected_be) in cases { + std::dbg!(value); + + // Little endian + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_i16(&mut writer, Endian::Little, value).unwrap(); + + assert_eq!(writer.bytes_written(), expected_le.len()); + assert_eq!(&buffer[..expected_le.len()], expected_le); + + // Big endian + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_i16(&mut writer, Endian::Big, value).unwrap(); + + assert_eq!(writer.bytes_written(), expected_be.len()); + assert_eq!(&buffer[..expected_be.len()], expected_be); + } +} + +#[test] +fn test_encode_i32() { + let cases: &[(i32, &[u8], &[u8])] = &[ + (0, &[0], &[0]), + (2, &[4], &[4]), + (256, &[super::U16_BYTE, 0, 2], &[super::U16_BYTE, 2, 0]), + ( + 16_000, + &[super::U16_BYTE, 0, 125], + &[super::U16_BYTE, 125, 0], + ), + ( + 40_000, + &[super::U32_BYTE, 128, 56, 1, 0], + &[super::U32_BYTE, 0, 1, 56, 128], + ), + ( + i32::MAX - 1, + &[super::U32_BYTE, 252, 255, 255, 255], + &[super::U32_BYTE, 255, 255, 255, 252], + ), + ( + i32::MAX, + &[super::U32_BYTE, 254, 255, 255, 255], + &[super::U32_BYTE, 255, 255, 255, 254], + ), + ]; + + use crate::enc::write::SliceWriter; + let mut buffer = [0u8; 20]; + for &(value, expected_le, expected_be) in cases { + std::dbg!(value); + + // Little endian + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_i32(&mut writer, Endian::Little, value).unwrap(); + + assert_eq!(writer.bytes_written(), expected_le.len()); + assert_eq!(&buffer[..expected_le.len()], expected_le); + + // Big endian + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_i32(&mut writer, Endian::Big, value).unwrap(); + + assert_eq!(writer.bytes_written(), expected_be.len()); + assert_eq!(&buffer[..expected_be.len()], expected_be); + } +} + +#[test] +fn test_encode_i64() { + let cases: &[(i64, &[u8], &[u8])] = &[ + (0, &[0], &[0]), + (2, &[4], &[4]), + (256, &[super::U16_BYTE, 0, 2], &[super::U16_BYTE, 2, 0]), + ( + 16_000, + &[super::U16_BYTE, 0, 125], + &[super::U16_BYTE, 125, 0], + ), + ( + 40_000, + &[super::U32_BYTE, 128, 56, 1, 0], + &[super::U32_BYTE, 0, 1, 56, 128], + ), + ( + 3_000_000_000, + &[super::U64_BYTE, 0, 188, 160, 101, 1, 0, 0, 0], + &[super::U64_BYTE, 0, 0, 0, 1, 101, 160, 188, 0], + ), + ( + i64::MAX - 1, + &[super::U64_BYTE, 252, 255, 255, 255, 255, 255, 255, 255], + &[super::U64_BYTE, 255, 255, 255, 255, 255, 255, 255, 252], + ), + ( + i64::MAX, + &[super::U64_BYTE, 254, 255, 255, 255, 255, 255, 255, 255], + &[super::U64_BYTE, 255, 255, 255, 255, 255, 255, 255, 254], + ), + ]; + + use crate::enc::write::SliceWriter; + let mut buffer = [0u8; 20]; + for &(value, expected_le, expected_be) in cases { + std::dbg!(value); + + // Little endian + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_i64(&mut writer, Endian::Little, value).unwrap(); + + assert_eq!(writer.bytes_written(), expected_le.len()); + assert_eq!(&buffer[..expected_le.len()], expected_le); + + // Big endian + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_i64(&mut writer, Endian::Big, value).unwrap(); + + assert_eq!(writer.bytes_written(), expected_be.len()); + assert_eq!(&buffer[..expected_be.len()], expected_be); + } +} + +#[test] +fn test_encode_i128() { + #[rustfmt::skip] + let cases: &[(i128, &[u8], &[u8])] = &[ + (0, &[0], &[0]), + (2, &[4], &[4]), + (256, &[super::U16_BYTE, 0, 2], &[super::U16_BYTE, 2, 0]), + ( + 16_000, + &[super::U16_BYTE, 0, 125], + &[super::U16_BYTE, 125, 0], + ), + ( + 40_000, + &[super::U32_BYTE, 128, 56, 1, 0], + &[super::U32_BYTE, 0, 1, 56, 128], + ), + ( + 3_000_000_000, + &[super::U64_BYTE, 0, 188, 160, 101, 1, 0, 0, 0], + &[super::U64_BYTE, 0, 0, 0, 1, 101, 160, 188, 0], + ), + ( + 11_000_000_000_000_000_000, + &[ + super::U128_BYTE, + 0, 0, 152, 98, 112, 179, 79, 49, + 1, 0, 0, 0, 0, 0, 0, 0, + ], + &[ + super::U128_BYTE, + 0, 0, 0, 0, 0, 0, 0, 1, + 49, 79, 179, 112, 98, 152, 0, 0, + ], + ), + ( + i128::MAX - 1, + &[ + super::U128_BYTE, + 252, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + ], + &[ + super::U128_BYTE, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 252, + ], + ), + ( + i128::MAX, + &[ + super::U128_BYTE, + 254, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + ], + &[ + super::U128_BYTE, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 254, + ], + ), + ]; + + use crate::enc::write::SliceWriter; + let mut buffer = [0u8; 20]; + for &(value, expected_le, expected_be) in cases { + std::dbg!(value); + + // Little endian + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_i128(&mut writer, Endian::Little, value).unwrap(); + + assert_eq!(writer.bytes_written(), expected_le.len()); + assert_eq!(&buffer[..expected_le.len()], expected_le); + + // Big endian + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_i128(&mut writer, Endian::Big, value).unwrap(); + + assert_eq!(writer.bytes_written(), expected_be.len()); + assert_eq!(&buffer[..expected_be.len()], expected_be); + } +} diff --git a/src/varint/encode_unsigned.rs b/src/varint/encode_unsigned.rs new file mode 100644 index 000000000..6c1138d22 --- /dev/null +++ b/src/varint/encode_unsigned.rs @@ -0,0 +1,383 @@ +use super::{SINGLE_BYTE_MAX, U128_BYTE, U16_BYTE, U32_BYTE, U64_BYTE}; +use crate::{config::Endian, enc::write::Writer, error::EncodeError}; + +pub fn varint_encode_u16( + writer: &mut W, + endian: Endian, + val: u16, +) -> Result<(), EncodeError> { + if val <= SINGLE_BYTE_MAX as _ { + writer.write(&[val as u8]) + } else { + writer.write(&[U16_BYTE])?; + match endian { + Endian::Big => writer.write(&val.to_be_bytes()), + Endian::Little => writer.write(&val.to_le_bytes()), + } + } +} + +pub fn varint_encode_u32( + writer: &mut W, + endian: Endian, + val: u32, +) -> Result<(), EncodeError> { + if val <= SINGLE_BYTE_MAX as _ { + writer.write(&[val as u8]) + } else if val <= u16::MAX as _ { + writer.write(&[U16_BYTE])?; + match endian { + Endian::Big => writer.write(&(val as u16).to_be_bytes()), + Endian::Little => writer.write(&(val as u16).to_le_bytes()), + } + } else { + writer.write(&[U32_BYTE])?; + match endian { + Endian::Big => writer.write(&val.to_be_bytes()), + Endian::Little => writer.write(&val.to_le_bytes()), + } + } +} + +pub fn varint_encode_u64( + writer: &mut W, + endian: Endian, + val: u64, +) -> Result<(), EncodeError> { + if val <= SINGLE_BYTE_MAX as _ { + writer.write(&[val as u8]) + } else if val <= u16::MAX as _ { + writer.write(&[U16_BYTE])?; + match endian { + Endian::Big => writer.write(&(val as u16).to_be_bytes()), + Endian::Little => writer.write(&(val as u16).to_le_bytes()), + } + } else if val <= u32::MAX as _ { + writer.write(&[U32_BYTE])?; + match endian { + Endian::Big => writer.write(&(val as u32).to_be_bytes()), + Endian::Little => writer.write(&(val as u32).to_le_bytes()), + } + } else { + writer.write(&[U64_BYTE])?; + match endian { + Endian::Big => writer.write(&val.to_be_bytes()), + Endian::Little => writer.write(&val.to_le_bytes()), + } + } +} + +pub fn varint_encode_u128( + writer: &mut W, + endian: Endian, + val: u128, +) -> Result<(), EncodeError> { + if val <= SINGLE_BYTE_MAX as _ { + writer.write(&[val as u8]) + } else if val <= u16::MAX as _ { + writer.write(&[U16_BYTE])?; + match endian { + Endian::Big => writer.write(&(val as u16).to_be_bytes()), + Endian::Little => writer.write(&(val as u16).to_le_bytes()), + } + } else if val <= u32::MAX as _ { + writer.write(&[U32_BYTE])?; + match endian { + Endian::Big => writer.write(&(val as u32).to_be_bytes()), + Endian::Little => writer.write(&(val as u32).to_le_bytes()), + } + } else if val <= u64::MAX as _ { + writer.write(&[U64_BYTE])?; + match endian { + Endian::Big => writer.write(&(val as u64).to_be_bytes()), + Endian::Little => writer.write(&(val as u64).to_le_bytes()), + } + } else { + writer.write(&[U128_BYTE])?; + match endian { + Endian::Big => writer.write(&val.to_be_bytes()), + Endian::Little => writer.write(&val.to_le_bytes()), + } + } +} + +pub fn varint_encode_usize( + writer: &mut W, + endian: Endian, + val: usize, +) -> Result<(), EncodeError> { + // usize is being encoded as a u64 + varint_encode_u64(writer, endian, val as u64) +} + +#[test] +fn test_encode_u16() { + use crate::enc::write::SliceWriter; + let mut buffer = [0u8; 20]; + + // these should all encode to a single byte + for i in 0u16..=SINGLE_BYTE_MAX as u16 { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u16(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 1); + assert_eq!(buffer[0] as u16, i); + + // Assert endianness doesn't matter + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u16(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 1); + assert_eq!(buffer[0] as u16, i); + } + + // these values should encode in 3 bytes (leading byte + 2 bytes) + // Values chosen at random, add new cases as needed + for i in [ + SINGLE_BYTE_MAX as u16 + 1, + 300, + 500, + 700, + 888, + 1234, + u16::MAX, + ] { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u16(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 3); + assert_eq!(buffer[0], U16_BYTE); + assert_eq!(&buffer[1..3], &i.to_be_bytes()); + + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u16(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 3); + assert_eq!(buffer[0], U16_BYTE); + assert_eq!(&buffer[1..3], &i.to_le_bytes()); + } +} + +#[test] +fn test_encode_u32() { + use crate::enc::write::SliceWriter; + let mut buffer = [0u8; 20]; + + // these should all encode to a single byte + for i in 0u32..=SINGLE_BYTE_MAX as u32 { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u32(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 1); + assert_eq!(buffer[0] as u32, i); + + // Assert endianness doesn't matter + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u32(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 1); + assert_eq!(buffer[0] as u32, i); + } + + // these values should encode in 3 bytes (leading byte + 2 bytes) + // Values chosen at random, add new cases as needed + for i in [ + SINGLE_BYTE_MAX as u32 + 1, + 300, + 500, + 700, + 888, + 1234, + u16::MAX as u32, + ] { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u32(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 3); + assert_eq!(buffer[0], U16_BYTE); + assert_eq!(&buffer[1..3], &(i as u16).to_be_bytes()); + + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u32(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 3); + assert_eq!(buffer[0], U16_BYTE); + assert_eq!(&buffer[1..3], &(i as u16).to_le_bytes()); + } + + // these values should encode in 5 bytes (leading byte + 4 bytes) + // Values chosen at random, add new cases as needed + for i in [u16::MAX as u32 + 1, 100_000, 1_000_000, u32::MAX] { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u32(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 5); + assert_eq!(buffer[0], U32_BYTE); + assert_eq!(&buffer[1..5], &i.to_be_bytes()); + + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u32(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 5); + assert_eq!(buffer[0], U32_BYTE); + assert_eq!(&buffer[1..5], &i.to_le_bytes()); + } +} + +#[test] +fn test_encode_u64() { + use crate::enc::write::SliceWriter; + let mut buffer = [0u8; 20]; + + // these should all encode to a single byte + for i in 0u64..=SINGLE_BYTE_MAX as u64 { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u64(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 1); + assert_eq!(buffer[0] as u64, i); + + // Assert endianness doesn't matter + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u64(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 1); + assert_eq!(buffer[0] as u64, i); + } + + // these values should encode in 3 bytes (leading byte + 2 bytes) + // Values chosen at random, add new cases as needed + for i in [ + SINGLE_BYTE_MAX as u64 + 1, + 300, + 500, + 700, + 888, + 1234, + u16::MAX as u64, + ] { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u64(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 3); + assert_eq!(buffer[0], U16_BYTE); + assert_eq!(&buffer[1..3], &(i as u16).to_be_bytes()); + + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u64(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 3); + assert_eq!(buffer[0], U16_BYTE); + assert_eq!(&buffer[1..3], &(i as u16).to_le_bytes()); + } + + // these values should encode in 5 bytes (leading byte + 4 bytes) + // Values chosen at random, add new cases as needed + for i in [u16::MAX as u64 + 1, 100_000, 1_000_000, u32::MAX as u64] { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u64(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 5); + assert_eq!(buffer[0], U32_BYTE); + assert_eq!(&buffer[1..5], &(i as u32).to_be_bytes()); + + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u64(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 5); + assert_eq!(buffer[0], U32_BYTE); + assert_eq!(&buffer[1..5], &(i as u32).to_le_bytes()); + } + + // these values should encode in 9 bytes (leading byte + 8 bytes) + // Values chosen at random, add new cases as needed + for i in [u32::MAX as u64 + 1, 500_0000_000, u64::MAX] { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u64(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 9); + assert_eq!(buffer[0], U64_BYTE); + assert_eq!(&buffer[1..9], &i.to_be_bytes()); + + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u64(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 9); + assert_eq!(buffer[0], U64_BYTE); + assert_eq!(&buffer[1..9], &i.to_le_bytes()); + } +} + +#[test] +fn test_encode_u128() { + use crate::enc::write::SliceWriter; + let mut buffer = [0u8; 20]; + + // these should all encode to a single byte + for i in 0u128..=SINGLE_BYTE_MAX as u128 { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u128(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 1); + assert_eq!(buffer[0] as u128, i); + + // Assert endianness doesn't matter + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u128(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 1); + assert_eq!(buffer[0] as u128, i); + } + + // these values should encode in 3 bytes (leading byte + 2 bytes) + // Values chosen at random, add new cases as needed + for i in [ + SINGLE_BYTE_MAX as u128 + 1, + 300, + 500, + 700, + 888, + 1234, + u16::MAX as u128, + ] { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u128(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 3); + assert_eq!(buffer[0], U16_BYTE); + assert_eq!(&buffer[1..3], &(i as u16).to_be_bytes()); + + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u128(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 3); + assert_eq!(buffer[0], U16_BYTE); + assert_eq!(&buffer[1..3], &(i as u16).to_le_bytes()); + } + + // these values should encode in 5 bytes (leading byte + 4 bytes) + // Values chosen at random, add new cases as needed + for i in [u16::MAX as u128 + 1, 100_000, 1_000_000, u32::MAX as u128] { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u128(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 5); + assert_eq!(buffer[0], U32_BYTE); + assert_eq!(&buffer[1..5], &(i as u32).to_be_bytes()); + + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u128(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 5); + assert_eq!(buffer[0], U32_BYTE); + assert_eq!(&buffer[1..5], &(i as u32).to_le_bytes()); + } + + // these values should encode in 9 bytes (leading byte + 8 bytes) + // Values chosen at random, add new cases as needed + for i in [u32::MAX as u128 + 1, 500_0000_000, u64::MAX as u128] { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u128(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 9); + assert_eq!(buffer[0], U64_BYTE); + assert_eq!(&buffer[1..9], &(i as u64).to_be_bytes()); + + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u128(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 9); + assert_eq!(buffer[0], U64_BYTE); + assert_eq!(&buffer[1..9], &(i as u64).to_le_bytes()); + } + + // these values should encode in 17 bytes (leading byte + 16 bytes) + // Values chosen at random, add new cases as needed + for i in [u64::MAX as u128 + 1, u128::MAX] { + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u128(&mut writer, Endian::Big, i).unwrap(); + assert_eq!(writer.bytes_written(), 17); + assert_eq!(buffer[0], U128_BYTE); + assert_eq!(&buffer[1..17], &i.to_be_bytes()); + + let mut writer = SliceWriter::new(&mut buffer); + varint_encode_u128(&mut writer, Endian::Little, i).unwrap(); + assert_eq!(writer.bytes_written(), 17); + assert_eq!(buffer[0], U128_BYTE); + assert_eq!(&buffer[1..17], &i.to_le_bytes()); + } +} diff --git a/src/varint/mod.rs b/src/varint/mod.rs new file mode 100644 index 000000000..afadfb506 --- /dev/null +++ b/src/varint/mod.rs @@ -0,0 +1,29 @@ +mod decode_signed; +mod decode_unsigned; +mod encode_signed; +mod encode_unsigned; + +pub use self::{ + decode_signed::{ + varint_decode_i128, varint_decode_i16, varint_decode_i32, varint_decode_i64, + varint_decode_isize, + }, + decode_unsigned::{ + varint_decode_u128, varint_decode_u16, varint_decode_u32, varint_decode_u64, + varint_decode_usize, + }, + encode_signed::{ + varint_encode_i128, varint_encode_i16, varint_encode_i32, varint_encode_i64, + varint_encode_isize, + }, + encode_unsigned::{ + varint_encode_u128, varint_encode_u16, varint_encode_u32, varint_encode_u64, + varint_encode_usize, + }, +}; + +pub(self) const SINGLE_BYTE_MAX: u8 = 250; +pub(self) const U16_BYTE: u8 = 251; +pub(self) const U32_BYTE: u8 = 252; +pub(self) const U64_BYTE: u8 = 253; +pub(self) const U128_BYTE: u8 = 254; diff --git a/tests/alloc.rs b/tests/alloc.rs new file mode 100644 index 000000000..baad469cd --- /dev/null +++ b/tests/alloc.rs @@ -0,0 +1,91 @@ +#![cfg(feature = "alloc")] + +extern crate alloc; + +mod utils; + +use alloc::borrow::Cow; +use alloc::collections::*; +use alloc::rc::Rc; +#[cfg(feature = "atomic")] +use alloc::sync::Arc; +use bincode::config::Configuration; +use utils::{the_same, the_same_with_comparer}; + +struct Foo { + pub a: u32, + pub b: u32, +} + +impl bincode::enc::Encode for Foo { + fn encode( + &self, + mut encoder: E, + ) -> Result<(), bincode::error::EncodeError> { + self.a.encode(&mut encoder)?; + self.b.encode(&mut encoder)?; + Ok(()) + } +} + +impl bincode::de::Decode for Foo { + fn decode( + mut decoder: D, + ) -> Result { + Ok(Self { + a: bincode::de::Decode::decode(&mut decoder)?, + b: bincode::de::Decode::decode(&mut decoder)?, + }) + } +} + +#[test] +fn test_vec() { + let vec = bincode::encode_to_vec(Foo { a: 5, b: 10 }, Configuration::standard()).unwrap(); + assert_eq!(vec, &[5, 10]); + + let foo: Foo = bincode::decode_from_slice(&vec, Configuration::standard()).unwrap(); + assert_eq!(foo.a, 5); + assert_eq!(foo.b, 10); +} + +#[test] +fn test_alloc_commons() { + the_same::>(vec![1, 2, 3, 4, 5]); + the_same(String::from("Hello world")); + the_same(Box::::new(5)); + the_same(Box::<[u32]>::from(vec![1, 2, 3, 4, 5])); + the_same(Cow::::Owned(5)); + the_same(Cow::::Borrowed(&5)); + the_same(Rc::::new(5)); + #[cfg(feature = "atomic")] + the_same(Arc::::new(5)); + the_same_with_comparer( + { + let mut map = BinaryHeap::::new(); + map.push(1); + map.push(2); + map.push(3); + map.push(4); + map.push(5); + map + }, + |a, b| a.into_iter().collect::>() == b.into_iter().collect::>(), + ); + the_same({ + let mut map = BTreeMap::::new(); + map.insert(5, -5); + map + }); + the_same({ + let mut set = BTreeSet::::new(); + set.insert(5); + set + }); + the_same({ + let mut set = VecDeque::::new(); + set.push_back(15); + set.push_front(5); + set + }); +} diff --git a/tests/atomic.rs b/tests/atomic.rs new file mode 100644 index 000000000..41e53e877 --- /dev/null +++ b/tests/atomic.rs @@ -0,0 +1,49 @@ +#![cfg(feature = "atomic")] + +mod utils; + +use core::sync::atomic::{ + AtomicBool, AtomicI16, AtomicI32, AtomicI64, AtomicI8, AtomicIsize, AtomicU16, AtomicU32, + AtomicU64, AtomicU8, AtomicUsize, Ordering, +}; +use utils::the_same_with_comparer; + +#[test] +fn test_atomic_commons() { + the_same_with_comparer(AtomicBool::new(true), |a, b| { + a.load(Ordering::SeqCst) == b.load(Ordering::SeqCst) + }); + the_same_with_comparer(AtomicBool::new(false), |a, b| { + a.load(Ordering::SeqCst) == b.load(Ordering::SeqCst) + }); + the_same_with_comparer(AtomicU8::new(0), |a, b| { + a.load(Ordering::SeqCst) == b.load(Ordering::SeqCst) + }); + the_same_with_comparer(AtomicU16::new(0), |a, b| { + a.load(Ordering::SeqCst) == b.load(Ordering::SeqCst) + }); + the_same_with_comparer(AtomicU32::new(0), |a, b| { + a.load(Ordering::SeqCst) == b.load(Ordering::SeqCst) + }); + the_same_with_comparer(AtomicU64::new(0), |a, b| { + a.load(Ordering::SeqCst) == b.load(Ordering::SeqCst) + }); + the_same_with_comparer(AtomicUsize::new(0), |a, b| { + a.load(Ordering::SeqCst) == b.load(Ordering::SeqCst) + }); + the_same_with_comparer(AtomicI8::new(0), |a, b| { + a.load(Ordering::SeqCst) == b.load(Ordering::SeqCst) + }); + the_same_with_comparer(AtomicI16::new(0), |a, b| { + a.load(Ordering::SeqCst) == b.load(Ordering::SeqCst) + }); + the_same_with_comparer(AtomicI32::new(0), |a, b| { + a.load(Ordering::SeqCst) == b.load(Ordering::SeqCst) + }); + the_same_with_comparer(AtomicI64::new(0), |a, b| { + a.load(Ordering::SeqCst) == b.load(Ordering::SeqCst) + }); + the_same_with_comparer(AtomicIsize::new(0), |a, b| { + a.load(Ordering::SeqCst) == b.load(Ordering::SeqCst) + }); +} diff --git a/tests/basic_types.rs b/tests/basic_types.rs new file mode 100644 index 000000000..8b8d4e8fa --- /dev/null +++ b/tests/basic_types.rs @@ -0,0 +1,169 @@ +mod utils; + +use bincode::config::Configuration; +use core::cell::{Cell, RefCell}; +use core::ops::Bound; +use core::time::Duration; +use std::num::*; +use utils::the_same; + +#[test] +fn test_numbers() { + // integer types + the_same(5u8); + the_same(5u16); + the_same(5u32); + the_same(5u64); + the_same(5u128); + the_same(5usize); + + the_same(5i8); + the_same(5i16); + the_same(5i32); + the_same(5i64); + the_same(5i128); + the_same(5isize); + + the_same(5.0f32); + the_same(5.0f64); + + // bool + the_same(true); + the_same(false); + + // utf8 characters + for char in "aÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö文".chars() + { + the_same(char); + } + + // tuples, up to 8 + the_same((1u8,)); + the_same((1u8, 2u8)); + the_same((1u8, 2u8, 3u8)); + the_same((1u8, 2u8, 3u8, 4u8)); + the_same((1u8, 2u8, 3u8, 4u8, 5u8)); + the_same((1u8, 2u8, 3u8, 4u8, 5u8, 6u8)); + the_same((1u8, 2u8, 3u8, 4u8, 5u8, 6u8, 7u8)); + the_same((1u8, 2u8, 3u8, 4u8, 5u8, 6u8, 7u8, 8u8)); + + // arrays + #[rustfmt::skip] + the_same([ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, + 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, + 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, + 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, + 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, + 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, + 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, + 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255 + ]); + + // Common types + the_same(Option::::None); + the_same(Option::::Some(1234)); + + the_same(Result::::Ok(1555)); + the_same(Result::::Err(15)); + + the_same(Cell::::new(15)); + the_same(RefCell::::new(15)); + + the_same(Duration::new(5, 730023852)); + the_same(5u8..10u8); + the_same(5u8..=10u8); + the_same(Bound::::Unbounded); + the_same(Bound::::Included(105)); + the_same(Bound::::Excluded(5)); + + // NonZero* types + the_same(NonZeroU8::new(0)); + the_same(NonZeroU8::new(123)); + the_same(NonZeroU16::new(0)); + the_same(NonZeroU16::new(12345)); + the_same(NonZeroU32::new(0)); + the_same(NonZeroU32::new(12345)); + the_same(NonZeroU64::new(0)); + the_same(NonZeroU64::new(12345)); + the_same(NonZeroU128::new(0)); + the_same(NonZeroU128::new(12345)); + the_same(NonZeroUsize::new(0)); + the_same(NonZeroUsize::new(12345)); + + the_same(NonZeroI8::new(0)); + the_same(NonZeroI8::new(123)); + the_same(NonZeroI16::new(0)); + the_same(NonZeroI16::new(12345)); + the_same(NonZeroI32::new(0)); + the_same(NonZeroI32::new(12345)); + the_same(NonZeroI64::new(0)); + the_same(NonZeroI64::new(12345)); + the_same(NonZeroI128::new(0)); + the_same(NonZeroI128::new(12345)); + the_same(NonZeroIsize::new(0)); + the_same(NonZeroIsize::new(12345)); +} + +#[test] +fn test_refcell_already_borrowed() { + let cell = RefCell::new(5u32); + // first get a mutable reference to the cell + let _mutable_guard = cell.borrow_mut(); + // now try to encode it + let mut slice = [0u8; 10]; + let result = bincode::encode_into_slice(&cell, &mut slice, Configuration::standard()) + .expect_err("Encoding a borrowed refcell should fail"); + + match result { + bincode::error::EncodeError::RefCellAlreadyBorrowed { .. } => {} // ok + x => panic!("Expected a RefCellAlreadyBorrowed error, found {:?}", x), + } +} + +#[test] +fn test_slice() { + let mut buffer = [0u8; 32]; + let input: &[u8] = &[1, 2, 3, 4, 5, 6, 7]; + bincode::encode_into_slice(input, &mut buffer, Configuration::standard()).unwrap(); + assert_eq!(&buffer[..8], &[7, 1, 2, 3, 4, 5, 6, 7]); + + let output: &[u8] = + bincode::decode_from_slice(&mut buffer[..8], Configuration::standard()).unwrap(); + assert_eq!(input, output); +} + +#[test] +fn test_str() { + let mut buffer = [0u8; 32]; + let input: &str = "Hello world"; + bincode::encode_into_slice(input, &mut buffer, Configuration::standard()).unwrap(); + assert_eq!( + &buffer[..12], + &[11, 72, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100] + ); + + let output: &str = + bincode::decode_from_slice(&mut buffer[..12], Configuration::standard()).unwrap(); + assert_eq!(input, output); +} + +#[test] +fn test_array() { + let mut buffer = [0u8; 32]; + let input: [u8; 10] = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]; + bincode::encode_into_slice(input, &mut buffer, Configuration::standard()).unwrap(); + assert_eq!(&buffer[..10], &[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]); + + let output: [u8; 10] = + bincode::decode_from_slice(&mut buffer[..10], Configuration::standard()).unwrap(); + assert_eq!(input, output); +} diff --git a/tests/derive.rs b/tests/derive.rs new file mode 100644 index 000000000..0ec0e495f --- /dev/null +++ b/tests/derive.rs @@ -0,0 +1,146 @@ +#![cfg(feature = "derive")] + +use bincode::config::Configuration; +use bincode::{de::Decode, enc::Encode}; + +#[derive(bincode::Encode, PartialEq, Debug)] +pub(crate) struct Test { + a: T, + b: u32, + c: u8, +} + +#[derive(bincode::Decode, PartialEq, Debug, Eq)] +pub struct Test2 { + a: T, + b: u32, + c: u32, +} + +#[derive(bincode::Decode, PartialEq, Debug, Eq)] +pub struct Test3<'a> { + a: &'a str, + b: u32, + c: u32, +} + +#[derive(bincode::Encode, bincode::Decode, PartialEq, Debug, Eq)] +pub struct TestTupleStruct(u32, u32, u32); + +#[derive(bincode::Encode, bincode::Decode, PartialEq, Debug, Eq)] +pub enum TestEnum { + Foo, + Bar { name: u32 }, + Baz(u32, u32, u32), +} + +#[derive(bincode::Encode, bincode::Decode, PartialEq, Debug, Eq)] +pub enum TestEnum2<'a> { + Foo, + Bar { name: &'a str }, + Baz(u32, u32, u32), +} + +#[test] +fn test_encode() { + let start = Test { + a: 5i32, + b: 10u32, + c: 20u8, + }; + let mut slice = [0u8; 1024]; + let bytes_written = + bincode::encode_into_slice(start, &mut slice, Configuration::standard()).unwrap(); + assert_eq!(bytes_written, 3); + assert_eq!(&slice[..bytes_written], &[10, 10, 20]); +} + +#[cfg(feature = "std")] +#[test] +fn test_decode() { + let start = Test2 { + a: 5u32, + b: 10u32, + c: 1024u32, + }; + let slice = [5, 10, 251, 0, 4]; + let result: Test2 = + bincode::decode_from_std_read(&mut slice.as_ref(), Configuration::standard()).unwrap(); + assert_eq!(result, start); +} + +#[test] +fn test_encode_tuple() { + let start = TestTupleStruct(5, 10, 1024); + let mut slice = [0u8; 1024]; + let bytes_written = + bincode::encode_into_slice(start, &mut slice, Configuration::standard()).unwrap(); + assert_eq!(bytes_written, 5); + assert_eq!(&slice[..bytes_written], &[5, 10, 251, 0, 4]); +} + +#[test] +fn test_decode_tuple() { + let start = TestTupleStruct(5, 10, 1024); + let mut slice = [5, 10, 251, 0, 4]; + let result: TestTupleStruct = + bincode::decode_from_slice(&mut slice, Configuration::standard()).unwrap(); + assert_eq!(result, start); +} + +#[test] +fn test_encode_enum_struct_variant() { + let start = TestEnum::Bar { name: 5u32 }; + let mut slice = [0u8; 1024]; + let bytes_written = + bincode::encode_into_slice(start, &mut slice, Configuration::standard()).unwrap(); + assert_eq!(bytes_written, 2); + assert_eq!(&slice[..bytes_written], &[1, 5]); +} + +#[test] +fn test_decode_enum_struct_variant() { + let start = TestEnum::Bar { name: 5u32 }; + let mut slice = [1, 5]; + let result: TestEnum = + bincode::decode_from_slice(&mut slice, Configuration::standard()).unwrap(); + assert_eq!(result, start); +} + +#[test] +fn test_encode_enum_tuple_variant() { + let start = TestEnum::Baz(5, 10, 1024); + let mut slice = [0u8; 1024]; + let bytes_written = + bincode::encode_into_slice(start, &mut slice, Configuration::standard()).unwrap(); + assert_eq!(bytes_written, 6); + assert_eq!(&slice[..bytes_written], &[2, 5, 10, 251, 0, 4]); +} + +#[test] +fn test_decode_enum_unit_variant() { + let start = TestEnum::Foo; + let mut slice = [0]; + let result: TestEnum = + bincode::decode_from_slice(&mut slice, Configuration::standard()).unwrap(); + assert_eq!(result, start); +} + +#[test] +fn test_encode_enum_unit_variant() { + let start = TestEnum::Foo; + let mut slice = [0u8; 1024]; + let bytes_written = + bincode::encode_into_slice(start, &mut slice, Configuration::standard()).unwrap(); + assert_eq!(bytes_written, 1); + assert_eq!(&slice[..bytes_written], &[0]); +} + +#[test] +fn test_decode_enum_tuple_variant() { + let start = TestEnum::Baz(5, 10, 1024); + let mut slice = [2, 5, 10, 251, 0, 4]; + let result: TestEnum = + bincode::decode_from_slice(&mut slice, Configuration::standard()).unwrap(); + assert_eq!(result, start); +} diff --git a/tests/serde.rs b/tests/serde.rs new file mode 100644 index 000000000..17705b189 --- /dev/null +++ b/tests/serde.rs @@ -0,0 +1,31 @@ +#![cfg(all(feature = "serde", feature = "alloc", feature = "derive"))] + +use bincode::config::Configuration; +use serde_derive::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, bincode::Encode, bincode::Decode)] +pub struct SerdeRoundtrip { + pub a: u32, + #[serde(skip)] + pub b: u32, +} + +#[test] +fn test_serde_round_trip() { + // validate serde attribute working + let json = serde_json::to_string(&SerdeRoundtrip { a: 5, b: 5 }).unwrap(); + assert_eq!("{\"a\":5}", json); + + let result: SerdeRoundtrip = serde_json::from_str(&json).unwrap(); + assert_eq!(result.a, 5); + assert_eq!(result.b, 0); + + // validate bincode working + let bytes = + bincode::encode_to_vec(SerdeRoundtrip { a: 15, b: 15 }, Configuration::standard()).unwrap(); + assert_eq!(bytes, &[15, 15]); + let result: SerdeRoundtrip = + bincode::decode_from_slice(&bytes, Configuration::standard()).unwrap(); + assert_eq!(result.a, 15); + assert_eq!(result.b, 15); +} diff --git a/tests/std.rs b/tests/std.rs new file mode 100644 index 000000000..66b4e7b20 --- /dev/null +++ b/tests/std.rs @@ -0,0 +1,111 @@ +#![cfg(feature = "std")] + +mod utils; + +use bincode::config::Configuration; +use std::{ + ffi::{CStr, CString}, + io::{Cursor, Seek, SeekFrom}, + net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6}, + path::{Path, PathBuf}, + sync::{Mutex, RwLock}, +}; +use utils::the_same; + +use crate::utils::the_same_with_comparer; + +struct Foo { + pub a: u32, + pub b: u32, +} + +impl bincode::enc::Encode for Foo { + fn encode( + &self, + mut encoder: E, + ) -> Result<(), bincode::error::EncodeError> { + self.a.encode(&mut encoder)?; + self.b.encode(&mut encoder)?; + Ok(()) + } +} + +impl bincode::de::Decode for Foo { + fn decode( + mut decoder: D, + ) -> Result { + Ok(Self { + a: bincode::de::Decode::decode(&mut decoder)?, + b: bincode::de::Decode::decode(&mut decoder)?, + }) + } +} + +#[test] +fn test_std_cursor() { + let mut cursor = Cursor::<&[u8]>::new(&[5, 10]); + let foo: Foo = bincode::decode_from_std_read(&mut cursor, Configuration::standard()).unwrap(); + + assert_eq!(foo.a, 5); + assert_eq!(foo.b, 10); +} + +#[test] +fn test_std_file() { + let mut file = tempfile::tempfile().expect("Could not create temp file"); + + let bytes_written = + bincode::encode_into_std_write(Foo { a: 30, b: 50 }, &mut file, Configuration::standard()) + .unwrap(); + assert_eq!(bytes_written, 2); + file.seek(SeekFrom::Start(0)).unwrap(); + + let foo: Foo = bincode::decode_from_std_read(&mut file, Configuration::standard()).unwrap(); + + assert_eq!(foo.a, 30); + assert_eq!(foo.b, 50); +} + +#[test] +fn test_std_commons() { + the_same(CString::new("Hello world").unwrap()); + the_same(PathBuf::from("C:/Program Files/Foo")); + the_same(Ipv4Addr::LOCALHOST); + the_same(Ipv6Addr::LOCALHOST); + the_same(IpAddr::V4(Ipv4Addr::LOCALHOST)); + the_same(IpAddr::V6(Ipv6Addr::LOCALHOST)); + the_same(SocketAddrV4::new(Ipv4Addr::LOCALHOST, 12345)); + the_same(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 12345, 0, 0)); + the_same(SocketAddr::V4(SocketAddrV4::new( + Ipv4Addr::LOCALHOST, + 12345, + ))); + the_same(SocketAddr::V6(SocketAddrV6::new( + Ipv6Addr::LOCALHOST, + 12345, + 0, + 0, + ))); + the_same_with_comparer(Mutex::new("Hello world".to_string()), |a, b| { + &*a.lock().unwrap() == &*b.lock().unwrap() + }); + the_same_with_comparer(RwLock::new("Hello world".to_string()), |a, b| { + &*a.read().unwrap() == &*b.read().unwrap() + }); + + // Borrowed values + let config = bincode::config::Configuration::standard(); + let mut buffer = [0u8; 1024]; + + // &CStr + let cstr = CStr::from_bytes_with_nul(b"Hello world\0").unwrap(); + let len = bincode::encode_into_slice(cstr, &mut buffer, config).unwrap(); + let decoded: &CStr = bincode::decode_from_slice(&mut buffer[..len], config).unwrap(); + assert_eq!(cstr, decoded); + + // Path + let path = Path::new("C:/Program Files/Foo"); + let len = bincode::encode_into_slice(path, &mut buffer, config).unwrap(); + let decoded: &Path = bincode::decode_from_slice(&mut buffer[..len], config).unwrap(); + assert_eq!(path, decoded); +} diff --git a/tests/test.rs b/tests/test.rs deleted file mode 100644 index 0d6467677..000000000 --- a/tests/test.rs +++ /dev/null @@ -1,905 +0,0 @@ -#[macro_use] -extern crate serde_derive; - -extern crate bincode; -extern crate byteorder; -#[macro_use] -extern crate serde; -extern crate serde_bytes; - -use std::borrow::Cow; -use std::collections::HashMap; -use std::fmt::{self, Debug}; -use std::result::Result as StdResult; - -use bincode::{ - deserialize, deserialize_from, deserialize_in_place, serialize, serialized_size, - DefaultOptions, ErrorKind, Options, Result, -}; -use serde::de::{Deserialize, DeserializeSeed, Deserializer, SeqAccess, Visitor}; - -const LEN_SIZE: u64 = 8; - -fn the_same_impl(element: V, options: &mut O) -where - V: serde::Serialize + serde::de::DeserializeOwned + PartialEq + Debug + 'static, - O: Options, -{ - let size = options.serialized_size(&element).unwrap(); - - { - let encoded = options.serialize(&element).unwrap(); - let decoded: V = options.deserialize(&encoded[..]).unwrap(); - let decoded_reader = options.deserialize_from(&mut &encoded[..]).unwrap(); - - assert_eq!(element, decoded); - assert_eq!(element, decoded_reader); - assert_eq!(size, encoded.len() as u64); - } -} - -fn the_same(element: V) -where - V: serde::Serialize + serde::de::DeserializeOwned + PartialEq + Debug + Clone + 'static, -{ - // add a new macro which calls the previous when you add a new option set - macro_rules! all_endians { - ($element:expr, $options:expr) => { - the_same_impl($element.clone(), &mut $options.with_native_endian()); - the_same_impl($element.clone(), &mut $options.with_big_endian()); - the_same_impl($element.clone(), &mut $options.with_little_endian()); - }; - } - - macro_rules! all_integer_encodings { - ($element:expr, $options:expr) => { - all_endians!($element, $options.with_fixint_encoding()); - all_endians!($element, $options.with_varint_encoding()); - }; - } - - all_integer_encodings!(element, DefaultOptions::new()); -} - -#[test] -fn test_numbers() { - // unsigned positive - the_same(5u8); - the_same(5u16); - the_same(5u32); - the_same(5u64); - the_same(5usize); - // signed positive - the_same(5i8); - the_same(5i16); - the_same(5i32); - the_same(5i64); - the_same(5isize); - // signed negative - the_same(-5i8); - the_same(-5i16); - the_same(-5i32); - the_same(-5i64); - the_same(-5isize); - // floating - the_same(-100f32); - the_same(0f32); - the_same(5f32); - the_same(-100f64); - the_same(5f64); -} - -serde_if_integer128! { - #[test] - fn test_numbers_128bit() { - // unsigned positive - the_same(5u128); - the_same(u128::max_value()); - // signed positive - the_same(5i128); - the_same(i128::max_value()); - // signed negative - the_same(-5i128); - the_same(i128::min_value()); - } -} - -#[test] -fn test_string() { - the_same("".to_string()); - the_same("a".to_string()); -} - -#[test] -fn test_tuple() { - the_same((1isize,)); - the_same((1isize, 2isize, 3isize)); - the_same((1isize, "foo".to_string(), ())); -} - -#[test] -fn test_basic_struct() { - #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)] - struct Easy { - x: isize, - s: String, - y: usize, - } - the_same(Easy { - x: -4, - s: "foo".to_string(), - y: 10, - }); -} - -#[test] -fn test_nested_struct() { - #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)] - struct Easy { - x: isize, - s: String, - y: usize, - } - #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)] - struct Nest { - f: Easy, - b: usize, - s: Easy, - } - - the_same(Nest { - f: Easy { - x: -1, - s: "foo".to_string(), - y: 20, - }, - b: 100, - s: Easy { - x: -100, - s: "bar".to_string(), - y: 20, - }, - }); -} - -#[test] -fn test_struct_newtype() { - #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)] - struct NewtypeStr(usize); - - the_same(NewtypeStr(5)); -} - -#[test] -fn test_struct_tuple() { - #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)] - struct TubStr(usize, String, f32); - - the_same(TubStr(5, "hello".to_string(), 3.2)); -} - -#[test] -fn test_option() { - the_same(Some(5usize)); - the_same(Some("foo bar".to_string())); - the_same(None::); -} - -#[test] -fn test_enum() { - #[derive(Serialize, Deserialize, PartialEq, Debug, Clone)] - enum TestEnum { - NoArg, - OneArg(usize), - Args(usize, usize), - AnotherNoArg, - StructLike { x: usize, y: f32 }, - } - the_same(TestEnum::NoArg); - the_same(TestEnum::OneArg(4)); - //the_same(TestEnum::Args(4, 5)); - the_same(TestEnum::AnotherNoArg); - the_same(TestEnum::StructLike { x: 4, y: 3.14159 }); - the_same(vec![ - TestEnum::NoArg, - TestEnum::OneArg(5), - TestEnum::AnotherNoArg, - TestEnum::StructLike { x: 4, y: 1.4 }, - ]); -} - -#[test] -fn test_vec() { - let v: Vec = vec![]; - the_same(v); - the_same(vec![1u64]); - the_same(vec![1u64, 2, 3, 4, 5, 6]); -} - -#[test] -fn test_map() { - let mut m = HashMap::new(); - m.insert(4u64, "foo".to_string()); - m.insert(0u64, "bar".to_string()); - the_same(m); -} - -#[test] -fn test_bool() { - the_same(true); - the_same(false); -} - -#[test] -fn test_unicode() { - the_same("å".to_string()); - the_same("aåååååååa".to_string()); -} - -#[test] -fn test_fixed_size_array() { - the_same([24u32; 32]); - the_same([1u64, 2, 3, 4, 5, 6, 7, 8]); - the_same([0u8; 19]); -} - -#[test] -fn deserializing_errors() { - match *deserialize::(&vec![0xA][..]).unwrap_err() { - ErrorKind::InvalidBoolEncoding(0xA) => {} - _ => panic!(), - } - - let invalid_str = vec![1, 0xFF]; - - match *deserialize::(&invalid_str[..]).unwrap_err() { - ErrorKind::InvalidUtf8Encoding(_) => {} - e => panic!("{:?}", e), - } - - // Out-of-bounds variant - #[derive(Serialize, Deserialize, Debug)] - enum Test { - One, - Two, - } - - let invalid_enum = vec![0, 0, 0, 5]; - - match *deserialize::(&invalid_enum[..]).unwrap_err() { - // Error message comes from serde - ErrorKind::Custom(_) => {} - _ => panic!(), - } - match *deserialize::>(&vec![5, 0][..]).unwrap_err() { - ErrorKind::InvalidTagEncoding(_) => {} - _ => panic!(), - } -} - -#[test] -fn trailing_bytes() { - match DefaultOptions::new() - .deserialize::(b"1x") - .map_err(|e| *e) - { - Err(ErrorKind::Custom(_)) => {} - other => panic!("Expecting TrailingBytes, got {:?}", other), - } -} - -#[test] -fn too_big_deserialize() { - let serialized = vec![0, 0, 0, 3]; - let deserialized: Result = DefaultOptions::new() - .with_fixint_encoding() - .with_limit(3) - .deserialize_from(&mut &serialized[..]); - assert!(deserialized.is_err()); - - let serialized = vec![0, 0, 0, 3]; - let deserialized: Result = DefaultOptions::new() - .with_fixint_encoding() - .with_limit(4) - .deserialize_from(&mut &serialized[..]); - assert!(deserialized.is_ok()); -} - -#[test] -fn char_serialization() { - let chars = "Aa\0☺♪"; - for c in chars.chars() { - let encoded = DefaultOptions::new() - .with_limit(4) - .serialize(&c) - .expect("serializing char failed"); - let decoded: char = deserialize(&encoded).expect("deserializing failed"); - assert_eq!(decoded, c); - } -} - -#[test] -fn too_big_char_deserialize() { - let serialized = vec![0x41]; - let deserialized: Result = DefaultOptions::new() - .with_limit(1) - .deserialize_from(&mut &serialized[..]); - assert!(deserialized.is_ok()); - assert_eq!(deserialized.unwrap(), 'A'); -} - -#[test] -fn too_big_serialize() { - assert!(DefaultOptions::new() - .with_fixint_encoding() - .with_limit(3) - .serialize(&0u32) - .is_err()); - assert!(DefaultOptions::new() - .with_fixint_encoding() - .with_limit(4) - .serialize(&0u32) - .is_ok()); - - assert!(DefaultOptions::new() - .with_fixint_encoding() - .with_limit(LEN_SIZE + 4) - .serialize(&"abcde") - .is_err()); - assert!(DefaultOptions::new() - .with_fixint_encoding() - .with_limit(LEN_SIZE + 5) - .serialize(&"abcde") - .is_ok()); -} - -#[test] -fn test_serialized_size() { - let opt = DefaultOptions::new().with_fixint_encoding(); - assert!(opt.serialized_size(&0u8).unwrap() == 1); - assert!(opt.serialized_size(&0u16).unwrap() == 2); - assert!(opt.serialized_size(&0u32).unwrap() == 4); - assert!(opt.serialized_size(&0u64).unwrap() == 8); - - // length isize stored as u64 - assert!(opt.serialized_size(&"").unwrap() == LEN_SIZE); - assert!(opt.serialized_size(&"a").unwrap() == LEN_SIZE + 1); - - assert!(opt.serialized_size(&vec![0u32, 1u32, 2u32]).unwrap() == LEN_SIZE + 3 * (4)); -} - -#[test] -fn test_serialized_size_bounded() { - // JUST RIGHT - assert!( - DefaultOptions::new() - .with_fixint_encoding() - .with_limit(1) - .serialized_size(&0u8) - .unwrap() - == 1 - ); - assert!( - DefaultOptions::new() - .with_fixint_encoding() - .with_limit(2) - .serialized_size(&0u16) - .unwrap() - == 2 - ); - assert!( - DefaultOptions::new() - .with_fixint_encoding() - .with_limit(4) - .serialized_size(&0u32) - .unwrap() - == 4 - ); - assert!( - DefaultOptions::new() - .with_fixint_encoding() - .with_limit(8) - .serialized_size(&0u64) - .unwrap() - == 8 - ); - assert!( - DefaultOptions::new() - .with_fixint_encoding() - .with_limit(8) - .serialized_size(&"") - .unwrap() - == LEN_SIZE - ); - assert!( - DefaultOptions::new() - .with_fixint_encoding() - .with_limit(8 + 1) - .serialized_size(&"a") - .unwrap() - == LEN_SIZE + 1 - ); - assert!( - DefaultOptions::new() - .with_fixint_encoding() - .with_limit(LEN_SIZE + 3 * 4) - .serialized_size(&vec![0u32, 1u32, 2u32]) - .unwrap() - == LEN_SIZE + 3 * 4 - ); - // Below - assert!(DefaultOptions::new() - .with_fixint_encoding() - .with_limit(0) - .serialized_size(&0u8) - .is_err()); - assert!(DefaultOptions::new() - .with_fixint_encoding() - .with_limit(1) - .serialized_size(&0u16) - .is_err()); - assert!(DefaultOptions::new() - .with_fixint_encoding() - .with_limit(3) - .serialized_size(&0u32) - .is_err()); - assert!(DefaultOptions::new() - .with_fixint_encoding() - .with_limit(7) - .serialized_size(&0u64) - .is_err()); - assert!(DefaultOptions::new() - .with_fixint_encoding() - .with_limit(7) - .serialized_size(&"") - .is_err()); - assert!(DefaultOptions::new() - .with_fixint_encoding() - .with_limit(8 + 0) - .serialized_size(&"a") - .is_err()); - assert!(DefaultOptions::new() - .with_fixint_encoding() - .with_limit(8 + 3 * 4 - 1) - .serialized_size(&vec![0u32, 1u32, 2u32]) - .is_err()); -} - -#[test] -fn encode_box() { - the_same(Box::new(5)); -} - -#[test] -fn test_cow_serialize() { - let large_object = vec![1u32, 2, 3, 4, 5, 6]; - let mut large_map = HashMap::new(); - large_map.insert(1, 2); - - #[derive(Serialize, Deserialize, Debug)] - enum Message<'a> { - M1(Cow<'a, Vec>), - M2(Cow<'a, HashMap>), - } - - // Test 1 - { - let serialized = serialize(&Message::M1(Cow::Borrowed(&large_object))).unwrap(); - let deserialized: Message<'static> = deserialize_from(&mut &serialized[..]).unwrap(); - - match deserialized { - Message::M1(b) => assert!(&b.into_owned() == &large_object), - _ => assert!(false), - } - } - - // Test 2 - { - let serialized = serialize(&Message::M2(Cow::Borrowed(&large_map))).unwrap(); - let deserialized: Message<'static> = deserialize_from(&mut &serialized[..]).unwrap(); - - match deserialized { - Message::M2(b) => assert!(&b.into_owned() == &large_map), - _ => assert!(false), - } - } -} - -#[test] -fn test_strbox_serialize() { - let strx: &'static str = "hello world"; - let serialized = serialize(&Cow::Borrowed(strx)).unwrap(); - let deserialized: Cow<'static, String> = deserialize_from(&mut &serialized[..]).unwrap(); - let stringx: String = deserialized.into_owned(); - assert!(strx == &stringx[..]); -} - -#[test] -fn test_slicebox_serialize() { - let slice = [1u32, 2, 3, 4, 5]; - let serialized = serialize(&Cow::Borrowed(&slice[..])).unwrap(); - println!("{:?}", serialized); - let deserialized: Cow<'static, Vec> = deserialize_from(&mut &serialized[..]).unwrap(); - { - let sb: &[u32] = &deserialized; - assert!(slice == sb); - } - let vecx: Vec = deserialized.into_owned(); - assert!(slice == &vecx[..]); -} - -#[test] -fn test_multi_strings_serialize() { - assert!(serialize(&("foo", "bar", "baz")).is_ok()); -} - -#[test] -fn test_oom_protection() { - use std::io::Cursor; - #[derive(Serialize, Deserialize, PartialEq, Debug)] - struct FakeVec { - len: u64, - byte: u8, - } - let x = DefaultOptions::new() - .with_limit(10) - .serialize(&FakeVec { - len: 0xffffffffffffffffu64, - byte: 1, - }) - .unwrap(); - let y: Result> = DefaultOptions::new() - .with_limit(10) - .deserialize_from(&mut Cursor::new(&x[..])); - assert!(y.is_err()); -} - -#[test] -fn path_buf() { - use std::path::{Path, PathBuf}; - let path = Path::new("foo").to_path_buf(); - let serde_encoded = serialize(&path).unwrap(); - let decoded: PathBuf = deserialize(&serde_encoded).unwrap(); - assert!(path.to_str() == decoded.to_str()); -} - -#[test] -fn bytes() { - use serde_bytes::Bytes; - - let data = b"abc\0123"; - let s = serialize(&data[..]).unwrap(); - let s2 = serialize(&Bytes::new(data)).unwrap(); - assert_eq!(s[..], s2[..]); -} - -#[test] -fn serde_bytes() { - use serde_bytes::ByteBuf; - the_same(ByteBuf::from(vec![1, 2, 3, 4, 5])); -} - -#[test] -fn endian_difference() { - let x = 10u64; - let little = DefaultOptions::new() - .with_fixint_encoding() - .serialize(&x) - .unwrap(); - let big = DefaultOptions::new() - .with_big_endian() - .with_fixint_encoding() - .serialize(&x) - .unwrap(); - assert_ne!(little, big); -} - -#[test] -fn test_zero_copy_parse() { - #[derive(Serialize, Deserialize, Eq, PartialEq, Debug)] - struct Foo<'a> { - borrowed_str: &'a str, - borrowed_bytes: &'a [u8], - } - - let f = Foo { - borrowed_str: "hi", - borrowed_bytes: &[0, 1, 2, 3], - }; - { - let encoded = serialize(&f).unwrap(); - let out: Foo = deserialize(&encoded[..]).unwrap(); - assert_eq!(out, f); - } -} - -#[test] -fn test_zero_copy_parse_deserialize_into() { - use bincode::BincodeRead; - use std::io; - - /// A BincodeRead implementation for byte slices - pub struct SliceReader<'storage> { - slice: &'storage [u8], - } - - impl<'storage> SliceReader<'storage> { - #[inline(always)] - fn unexpected_eof() -> Box { - return Box::new(crate::ErrorKind::Io(io::Error::new( - io::ErrorKind::UnexpectedEof, - "", - ))); - } - } - - impl<'storage> io::Read for SliceReader<'storage> { - #[inline(always)] - fn read(&mut self, out: &mut [u8]) -> io::Result { - (&mut self.slice).read(out) - } - #[inline(always)] - fn read_exact(&mut self, out: &mut [u8]) -> io::Result<()> { - (&mut self.slice).read_exact(out) - } - } - - impl<'storage> BincodeRead<'storage> for SliceReader<'storage> { - #[inline(always)] - fn forward_read_str(&mut self, length: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'storage>, - { - use crate::ErrorKind; - if length > self.slice.len() { - return Err(SliceReader::unexpected_eof()); - } - - let string = match ::std::str::from_utf8(&self.slice[..length]) { - Ok(s) => s, - Err(e) => return Err(ErrorKind::InvalidUtf8Encoding(e).into()), - }; - let r = visitor.visit_borrowed_str(string); - self.slice = &self.slice[length..]; - r - } - - #[inline(always)] - fn get_byte_buffer(&mut self, length: usize) -> Result> { - if length > self.slice.len() { - return Err(SliceReader::unexpected_eof()); - } - - let r = &self.slice[..length]; - self.slice = &self.slice[length..]; - Ok(r.to_vec()) - } - - #[inline(always)] - fn forward_read_bytes(&mut self, length: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'storage>, - { - if length > self.slice.len() { - return Err(SliceReader::unexpected_eof()); - } - - let r = visitor.visit_borrowed_bytes(&self.slice[..length]); - self.slice = &self.slice[length..]; - r - } - } - - #[derive(Serialize, Deserialize, Eq, PartialEq, Debug)] - struct Foo<'a> { - borrowed_str: &'a str, - borrowed_bytes: &'a [u8], - } - - let f = Foo { - borrowed_str: "hi", - borrowed_bytes: &[0, 1, 2, 3], - }; - - { - let encoded = serialize(&f).unwrap(); - let mut target = Foo { - borrowed_str: "hello", - borrowed_bytes: &[10, 11, 12, 13], - }; - deserialize_in_place( - SliceReader { - slice: &encoded[..], - }, - &mut target, - ) - .unwrap(); - assert_eq!(target, f); - } -} - -#[test] -fn not_human_readable() { - use std::net::Ipv4Addr; - let ip = Ipv4Addr::new(1, 2, 3, 4); - the_same(ip); - assert_eq!(&ip.octets()[..], &serialize(&ip).unwrap()[..]); - assert_eq!( - ::std::mem::size_of::() as u64, - serialized_size(&ip).unwrap() - ); -} - -// The example is taken from serde::de::DeserializeSeed. -struct ExtendVec<'a, T: 'a>(&'a mut Vec); - -impl<'de, 'a, T> DeserializeSeed<'de> for ExtendVec<'a, T> -where - T: Deserialize<'de>, -{ - // The return type of the `deserialize` method. This implementation - // appends onto an existing vector but does not create any new data - // structure, so the return type is (). - type Value = (); - - fn deserialize(self, deserializer: D) -> StdResult - where - D: Deserializer<'de>, - { - // Visitor implementation that will walk an inner array of the JSON - // input. - struct ExtendVecVisitor<'a, T: 'a>(&'a mut Vec); - - impl<'de, 'a, T> Visitor<'de> for ExtendVecVisitor<'a, T> - where - T: Deserialize<'de>, - { - type Value = (); - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - write!(formatter, "an array of integers") - } - - fn visit_seq(self, mut seq: A) -> StdResult<(), A::Error> - where - A: SeqAccess<'de>, - { - // Visit each element in the inner array and push it onto - // the existing vector. - while let Some(elem) = seq.next_element()? { - self.0.push(elem); - } - Ok(()) - } - } - - deserializer.deserialize_seq(ExtendVecVisitor(self.0)) - } -} - -#[test] -fn test_default_deserialize_seed() { - let config = DefaultOptions::new(); - - let data: Vec<_> = (10..100).collect(); - let bytes = config.serialize(&data).expect("Config::serialize failed"); - - let mut seed_data: Vec<_> = (0..10).collect(); - { - let seed = ExtendVec(&mut seed_data); - config - .deserialize_seed(seed, &bytes) - .expect("Config::deserialize_seed failed"); - } - - assert_eq!(seed_data, (0..100).collect::>()); -} - -#[test] -fn test_big_endian_deserialize_seed() { - let config = DefaultOptions::new().with_big_endian(); - - let data: Vec<_> = (10..100).collect(); - let bytes = config.serialize(&data).expect("Config::serialize failed"); - - let mut seed_data: Vec<_> = (0..10).collect(); - { - let seed = ExtendVec(&mut seed_data); - config - .deserialize_seed(seed, &bytes) - .expect("Config::deserialize_seed failed"); - } - - assert_eq!(seed_data, (0..100).collect::>()); -} - -#[test] -fn test_default_deserialize_from_seed() { - let config = DefaultOptions::new(); - - let data: Vec<_> = (10..100).collect(); - let bytes = config.serialize(&data).expect("Config::serialize failed"); - - let mut seed_data: Vec<_> = (0..10).collect(); - { - let seed = ExtendVec(&mut seed_data); - config - .deserialize_from_seed(seed, &mut &*bytes) - .expect("Config::deserialize_from_seed failed"); - } - - assert_eq!(seed_data, (0..100).collect::>()); -} - -#[test] -fn test_big_endian_deserialize_from_seed() { - let config = DefaultOptions::new().with_big_endian(); - - let data: Vec<_> = (10..100).collect(); - let bytes = config.serialize(&data).expect("Config::serialize failed"); - - let mut seed_data: Vec<_> = (0..10).collect(); - { - let seed = ExtendVec(&mut seed_data); - config - .deserialize_from_seed(seed, &mut &*bytes) - .expect("Config::deserialize_from_seed failed"); - } - - assert_eq!(seed_data, (0..100).collect::>()); -} - -#[test] -fn test_varint_length_prefixes() { - let a = vec![(); 127]; // should be a single byte - let b = vec![(); 250]; // also should be a single byte - let c = vec![(); 251]; - let d = vec![(); u16::max_value() as usize + 1]; - - assert_eq!( - DefaultOptions::new() - .with_varint_encoding() - .serialized_size(&a[..]) - .unwrap(), - 1 - ); // 2 ** 7 - 1 - assert_eq!( - DefaultOptions::new() - .with_varint_encoding() - .serialized_size(&b[..]) - .unwrap(), - 1 - ); // 250 - assert_eq!( - DefaultOptions::new() - .with_varint_encoding() - .serialized_size(&c[..]) - .unwrap(), - (1 + std::mem::size_of::()) as u64 - ); // 251 - assert_eq!( - DefaultOptions::new() - .with_varint_encoding() - .serialized_size(&d[..]) - .unwrap(), - (1 + std::mem::size_of::()) as u64 - ); // 2 ** 16 + 1 -} - -#[test] -fn test_byte_vec_struct() { - #[derive(PartialEq, Eq, Clone, Serialize, Deserialize, Debug)] - struct ByteVecs { - a: Vec, - b: Vec, - c: Vec, - } - - let byte_struct = ByteVecs { - a: vec![2; 20], - b: vec![3; 30], - c: vec![1; 10], - }; - - the_same(byte_struct); -} diff --git a/tests/utils.rs b/tests/utils.rs new file mode 100644 index 000000000..015ab1bd3 --- /dev/null +++ b/tests/utils.rs @@ -0,0 +1,107 @@ +use bincode::config::{self, Config}; +use core::fmt::Debug; + +fn the_same_with_config(element: &V, config: C, cmp: CMP) +where + V: bincode::enc::Encode + bincode::de::Decode + Debug + 'static, + C: Config, + CMP: Fn(&V, &V) -> bool, +{ + let mut buffer = [0u8; 2048]; + let len = bincode::encode_into_slice(&element, &mut buffer, config).unwrap(); + println!( + "{:?}: {:?} ({:?})", + element, + &buffer[..len], + core::any::type_name::() + ); + let decoded: V = bincode::decode_from_slice(&mut buffer, config).unwrap(); + + assert!( + cmp(&element, &decoded), + "Comparison failed\nDecoded: {:?}\nExpected: {:?}\nBytes: {:?}", + decoded, + element, + &buffer[..len], + ); +} + +pub fn the_same_with_comparer(element: V, cmp: CMP) +where + V: bincode::enc::Encode + bincode::de::Decode + Debug + 'static, + CMP: Fn(&V, &V) -> bool, +{ + // A matrix of each different config option possible + the_same_with_config( + &element, + config::Configuration::standard() + .with_little_endian() + .with_fixed_int_encoding() + .skip_fixed_array_length(), + &cmp, + ); + the_same_with_config( + &element, + config::Configuration::standard() + .with_big_endian() + .with_fixed_int_encoding() + .skip_fixed_array_length(), + &cmp, + ); + the_same_with_config( + &element, + config::Configuration::standard() + .with_little_endian() + .with_variable_int_encoding() + .skip_fixed_array_length(), + &cmp, + ); + the_same_with_config( + &element, + config::Configuration::standard() + .with_big_endian() + .with_variable_int_encoding() + .skip_fixed_array_length(), + &cmp, + ); + the_same_with_config( + &element, + config::Configuration::standard() + .with_little_endian() + .with_fixed_int_encoding() + .write_fixed_array_length(), + &cmp, + ); + the_same_with_config( + &element, + config::Configuration::standard() + .with_big_endian() + .with_fixed_int_encoding() + .write_fixed_array_length(), + &cmp, + ); + the_same_with_config( + &element, + config::Configuration::standard() + .with_little_endian() + .with_variable_int_encoding() + .write_fixed_array_length(), + &cmp, + ); + the_same_with_config( + &element, + config::Configuration::standard() + .with_big_endian() + .with_variable_int_encoding() + .write_fixed_array_length(), + &cmp, + ); +} + +#[allow(dead_code)] // This is not used in every test +pub fn the_same(element: V) +where + V: bincode::enc::Encode + bincode::de::Decode + PartialEq + Debug + 'static, +{ + the_same_with_comparer(element, |a, b| a == b); +}