From 7cc583fdf168b6ed9df795af0642981d811c617d Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 7 Feb 2024 13:21:10 -0600 Subject: [PATCH] test(bench): Switch to Divan --- Cargo.lock | 289 ++++------------------- crates/typos-cli/Cargo.toml | 8 +- crates/typos-cli/benches/check_file.rs | 93 ++++++++ crates/typos-cli/benches/checks.rs | 69 ------ crates/typos-cli/benches/correct_word.rs | 155 ++++++++++++ crates/typos-cli/benches/corrections.rs | 116 --------- crates/typos-cli/benches/data.rs | 33 ++- crates/typos-cli/benches/tokenize.rs | 163 ++++++------- 8 files changed, 411 insertions(+), 515 deletions(-) create mode 100644 crates/typos-cli/benches/check_file.rs delete mode 100644 crates/typos-cli/benches/checks.rs create mode 100644 crates/typos-cli/benches/correct_word.rs delete mode 100644 crates/typos-cli/benches/corrections.rs diff --git a/Cargo.lock b/Cargo.lock index e6429c7ec..749af3108 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,12 +38,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - [[package]] name = "anstream" version = "0.6.11" @@ -157,18 +151,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bumpalo" -version = "3.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" - -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - [[package]] name = "cc" version = "1.0.83" @@ -184,33 +166,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "ciborium" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" - -[[package]] -name = "ciborium-ll" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" -dependencies = [ - "ciborium-io", - "half", -] - [[package]] name = "clap" version = "4.4.18" @@ -241,6 +196,7 @@ dependencies = [ "anstyle", "clap_lex", "strsim", + "terminal_size", ] [[package]] @@ -301,6 +257,12 @@ dependencies = [ "colorchoice", ] +[[package]] +name = "condtype" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af" + [[package]] name = "content_inspector" version = "0.2.4" @@ -316,42 +278,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" -[[package]] -name = "criterion" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" -dependencies = [ - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "is-terminal", - "itertools 0.10.5", - "num-traits", - "once_cell", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" -dependencies = [ - "cast", - "itertools 0.10.5", -] - [[package]] name = "crossbeam-deque" version = "0.8.3" @@ -488,6 +414,31 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" +[[package]] +name = "divan" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5398159ee27f2b123d89b856bad61725442f37df5fb98c30cd570c318d594aee" +dependencies = [ + "cfg-if", + "clap", + "condtype", + "divan-macros", + "libc", + "regex-lite", +] + +[[package]] +name = "divan-macros" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5092f66eb3563a01e85552731ae82c04c934ff4efd7ad1a0deae7b948f4b3ec4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.38", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -649,12 +600,6 @@ dependencies = [ "walkdir", ] -[[package]] -name = "half" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" - [[package]] name = "hashbrown" version = "0.14.1" @@ -667,12 +612,6 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" -[[package]] -name = "hermit-abi" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" - [[package]] name = "human-panic" version = "1.2.3" @@ -738,26 +677,6 @@ dependencies = [ "hashbrown", ] -[[package]] -name = "is-terminal" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" -dependencies = [ - "hermit-abi", - "rustix", - "windows-sys 0.48.0", -] - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.11.0" @@ -782,15 +701,6 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" -[[package]] -name = "js-sys" -version = "0.3.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" -dependencies = [ - "wasm-bindgen", -] - [[package]] name = "kstring" version = "2.0.0" @@ -873,15 +783,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" -[[package]] -name = "num-traits" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" -dependencies = [ - "autocfg", -] - [[package]] name = "object" version = "0.32.1" @@ -897,12 +798,6 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" -[[package]] -name = "oorandom" -version = "11.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" - [[package]] name = "os_info" version = "3.7.0" @@ -978,34 +873,6 @@ dependencies = [ "unicase", ] -[[package]] -name = "plotters" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" - -[[package]] -name = "plotters-svg" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" -dependencies = [ - "plotters-backend", -] - [[package]] name = "predicates" version = "3.0.4" @@ -1125,6 +992,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-lite" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30b661b2f27137bdbc16f00eda72866a92bb28af1753ffbd56744fb6e2e9cd8e" + [[package]] name = "regex-syntax" version = "0.8.2" @@ -1338,6 +1211,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "terminal_size" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +dependencies = [ + "rustix", + "windows-sys 0.48.0", +] + [[package]] name = "termtree" version = "0.4.1" @@ -1374,16 +1257,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "toml" version = "0.8.9" @@ -1470,10 +1343,10 @@ dependencies = [ "clap-verbosity-flag", "colorchoice-clap", "content_inspector", - "criterion", "derive_more", "derive_setters", "difflib", + "divan", "encoding_rs", "env_logger", "globset", @@ -1669,70 +1542,6 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" -[[package]] -name = "wasm-bindgen" -version = "0.2.87" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.87" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" -dependencies = [ - "bumpalo", - "log", - "once_cell", - "proc-macro2", - "quote", - "syn 2.0.38", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.87" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.87" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.38", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.87" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" - -[[package]] -name = "web-sys" -version = "0.3.64" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "wikipedia-dict" version = "0.4.0" diff --git a/crates/typos-cli/Cargo.toml b/crates/typos-cli/Cargo.toml index d771750ec..85c744e80 100644 --- a/crates/typos-cli/Cargo.toml +++ b/crates/typos-cli/Cargo.toml @@ -79,16 +79,16 @@ encoding_rs = "0.8.33" [dev-dependencies] assert_fs = "1.1" -trycmd = "0.14.20" -criterion = "0.5" +divan = "0.1.11" snapbox = "0.4.16" +trycmd = "0.14.20" [[bench]] -name = "checks" +name = "check_file" harness = false [[bench]] -name = "corrections" +name = "correct_word" harness = false [[bench]] diff --git a/crates/typos-cli/benches/check_file.rs b/crates/typos-cli/benches/check_file.rs new file mode 100644 index 000000000..6b624a08d --- /dev/null +++ b/crates/typos-cli/benches/check_file.rs @@ -0,0 +1,93 @@ +mod data; + +use assert_fs::prelude::*; +use typos_cli::file::FileChecker; + +#[divan::bench(args = data::DATA)] +fn found_files(bencher: divan::Bencher, sample: &data::Data) { + let dict = typos_cli::dict::BuiltIn::new(Default::default()); + let tokenizer = typos::tokens::Tokenizer::new(); + let policy = typos_cli::policy::Policy::new() + .dict(&dict) + .tokenizer(&tokenizer); + + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child(sample.name()); + sample_path.write_str(sample.content()).unwrap(); + + bencher + .counter(divan::counter::BytesCount::of_str(sample.content())) + .bench_local(|| { + typos_cli::file::FoundFiles.check_file(sample_path.path(), true, &policy, &PrintSilent) + }) +} + +#[divan::bench(args = data::DATA)] +fn identifiers(bencher: divan::Bencher, sample: &data::Data) { + let dict = typos_cli::dict::BuiltIn::new(Default::default()); + let tokenizer = typos::tokens::Tokenizer::new(); + let policy = typos_cli::policy::Policy::new() + .dict(&dict) + .tokenizer(&tokenizer); + + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child(sample.name()); + sample_path.write_str(sample.content()).unwrap(); + + bencher + .counter(divan::counter::BytesCount::of_str(sample.content())) + .bench_local(|| { + typos_cli::file::Identifiers.check_file(sample_path.path(), true, &policy, &PrintSilent) + }) +} + +#[divan::bench(args = data::DATA)] +fn words(bencher: divan::Bencher, sample: &data::Data) { + let dict = typos_cli::dict::BuiltIn::new(Default::default()); + let tokenizer = typos::tokens::Tokenizer::new(); + let policy = typos_cli::policy::Policy::new() + .dict(&dict) + .tokenizer(&tokenizer); + + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child(sample.name()); + sample_path.write_str(sample.content()).unwrap(); + + bencher + .counter(divan::counter::BytesCount::of_str(sample.content())) + .bench_local(|| { + typos_cli::file::Words.check_file(sample_path.path(), true, &policy, &PrintSilent) + }) +} + +#[divan::bench(args = data::DATA)] +fn typos(bencher: divan::Bencher, sample: &data::Data) { + let dict = typos_cli::dict::BuiltIn::new(Default::default()); + let tokenizer = typos::tokens::Tokenizer::new(); + let policy = typos_cli::policy::Policy::new() + .dict(&dict) + .tokenizer(&tokenizer); + + let temp = assert_fs::TempDir::new().unwrap(); + let sample_path = temp.child(sample.name()); + sample_path.write_str(sample.content()).unwrap(); + + bencher + .counter(divan::counter::BytesCount::of_str(sample.content())) + .bench_local(|| { + typos_cli::file::Typos.check_file(sample_path.path(), true, &policy, &PrintSilent) + }) +} + +#[derive(Debug, Default)] +pub struct PrintSilent; + +impl typos_cli::report::Report for PrintSilent { + fn report(&self, _msg: typos_cli::report::Message) -> Result<(), std::io::Error> { + Ok(()) + } +} + +fn main() { + divan::main(); +} diff --git a/crates/typos-cli/benches/checks.rs b/crates/typos-cli/benches/checks.rs deleted file mode 100644 index c55c93d9a..000000000 --- a/crates/typos-cli/benches/checks.rs +++ /dev/null @@ -1,69 +0,0 @@ -mod data; - -use assert_fs::prelude::*; -use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; -use typos_cli::file::FileChecker; - -fn bench_checks(c: &mut Criterion) { - let dict = typos_cli::dict::BuiltIn::new(Default::default()); - let tokenizer = typos::tokens::Tokenizer::new(); - let policy = typos_cli::policy::Policy::new() - .dict(&dict) - .tokenizer(&tokenizer); - - let temp = assert_fs::TempDir::new().unwrap(); - - let mut group = c.benchmark_group("check_file"); - for (name, sample) in data::DATA { - let sample_path = temp.child(name); - sample_path.write_str(sample).unwrap(); - - let len = sample.len(); - group.throughput(Throughput::Bytes(len as u64)); - group.bench_with_input(BenchmarkId::new("FoundFiles", name), &len, |b, _| { - b.iter(|| { - typos_cli::file::FoundFiles.check_file( - sample_path.path(), - true, - &policy, - &PrintSilent, - ) - }); - }); - group.bench_with_input(BenchmarkId::new("Identifiers", name), &len, |b, _| { - b.iter(|| { - typos_cli::file::Identifiers.check_file( - sample_path.path(), - true, - &policy, - &PrintSilent, - ) - }); - }); - group.bench_with_input(BenchmarkId::new("Words", name), &len, |b, _| { - b.iter(|| { - typos_cli::file::Words.check_file(sample_path.path(), true, &policy, &PrintSilent) - }); - }); - group.bench_with_input(BenchmarkId::new("Typos", name), &len, |b, _| { - b.iter(|| { - typos_cli::file::Typos.check_file(sample_path.path(), true, &policy, &PrintSilent) - }); - }); - } - group.finish(); - - temp.close().unwrap(); -} - -#[derive(Debug, Default)] -pub struct PrintSilent; - -impl typos_cli::report::Report for PrintSilent { - fn report(&self, _msg: typos_cli::report::Message) -> Result<(), std::io::Error> { - Ok(()) - } -} - -criterion_group!(benches, bench_checks,); -criterion_main!(benches); diff --git a/crates/typos-cli/benches/correct_word.rs b/crates/typos-cli/benches/correct_word.rs new file mode 100644 index 000000000..29bf381b5 --- /dev/null +++ b/crates/typos-cli/benches/correct_word.rs @@ -0,0 +1,155 @@ +mod regular { + mod ok { + #[divan::bench] + fn en(bencher: divan::Bencher) { + let input = "finalizes"; + let locale = typos_cli::config::Locale::En; + let corrections = typos_cli::dict::BuiltIn::new(locale); + let input = typos::tokens::Word::new(input, 0).unwrap(); + #[cfg(feature = "vars")] + assert_eq!(corrections.correct_word(input), None); + bencher + .with_inputs(|| input) + .bench_local_values(|input| corrections.correct_word(input)); + } + + #[divan::bench] + #[cfg(feature = "vars")] + fn en_us(bencher: divan::Bencher) { + let input = "finalizes"; + let locale = typos_cli::config::Locale::EnUs; + let corrections = typos_cli::dict::BuiltIn::new(locale); + let input = typos::tokens::Word::new(input, 0).unwrap(); + #[cfg(feature = "vars")] + assert_eq!(corrections.correct_word(input), Some(typos::Status::Valid)); + bencher + .with_inputs(|| input) + .bench_local_values(|input| corrections.correct_word(input)); + } + } + + mod misspell { + #[divan::bench] + fn en(bencher: divan::Bencher) { + let input = "finallizes"; + let output = "finalizes"; + let locale = typos_cli::config::Locale::En; + let corrections = typos_cli::dict::BuiltIn::new(locale); + let input = typos::tokens::Word::new(input, 0).unwrap(); + assert_eq!( + corrections.correct_word(input), + Some(typos::Status::Corrections(vec![ + std::borrow::Cow::Borrowed(output) + ])) + ); + bencher + .with_inputs(|| input) + .bench_local_values(|input| corrections.correct_word(input)); + } + + #[divan::bench] + #[cfg(feature = "vars")] + fn en_us(bencher: divan::Bencher) { + let input = "finallizes"; + let output = "finalizes"; + let locale = typos_cli::config::Locale::EnUs; + let corrections = typos_cli::dict::BuiltIn::new(locale); + let input = typos::tokens::Word::new(input, 0).unwrap(); + assert_eq!( + corrections.correct_word(input), + Some(typos::Status::Corrections(vec![ + std::borrow::Cow::Borrowed(output) + ])) + ); + bencher + .with_inputs(|| input) + .bench_local_values(|input| corrections.correct_word(input)); + } + } + + mod misspell_case { + #[divan::bench] + fn en(bencher: divan::Bencher) { + let input = "FINALLIZES"; + let output = "FINALIZES"; + let locale = typos_cli::config::Locale::En; + let corrections = typos_cli::dict::BuiltIn::new(locale); + let input = typos::tokens::Word::new(input, 0).unwrap(); + assert_eq!( + corrections.correct_word(input), + Some(typos::Status::Corrections(vec![ + std::borrow::Cow::Borrowed(output) + ])) + ); + bencher + .with_inputs(|| input) + .bench_local_values(|input| corrections.correct_word(input)); + } + + #[divan::bench] + #[cfg(feature = "vars")] + fn en_us(bencher: divan::Bencher) { + let input = "FINALLIZES"; + let output = "FINALIZES"; + let locale = typos_cli::config::Locale::EnUs; + let corrections = typos_cli::dict::BuiltIn::new(locale); + let input = typos::tokens::Word::new(input, 0).unwrap(); + assert_eq!( + corrections.correct_word(input), + Some(typos::Status::Corrections(vec![ + std::borrow::Cow::Borrowed(output) + ])) + ); + bencher + .with_inputs(|| input) + .bench_local_values(|input| corrections.correct_word(input)); + } + } +} + +#[cfg(feature = "vars")] +mod varcon { + mod ok { + #[divan::bench] + fn en_gb(bencher: divan::Bencher) { + let input = "finalizes"; + let output = "finalises"; + let locale = typos_cli::config::Locale::EnGb; + let corrections = typos_cli::dict::BuiltIn::new(locale); + let input = typos::tokens::Word::new(input, 0).unwrap(); + assert_eq!( + corrections.correct_word(input), + Some(typos::Status::Corrections(vec![ + std::borrow::Cow::Borrowed(output) + ])) + ); + bencher + .with_inputs(|| input) + .bench_local_values(|input| corrections.correct_word(input)); + } + } + + mod misspell { + #[divan::bench] + fn en_gb(bencher: divan::Bencher) { + let input = "finallizes"; + let output = "finalises"; + let locale = typos_cli::config::Locale::EnGb; + let corrections = typos_cli::dict::BuiltIn::new(locale); + let input = typos::tokens::Word::new(input, 0).unwrap(); + assert_eq!( + corrections.correct_word(input), + Some(typos::Status::Corrections(vec![ + std::borrow::Cow::Borrowed(output) + ])) + ); + bencher + .with_inputs(|| input) + .bench_local_values(|input| corrections.correct_word(input)); + } + } +} + +fn main() { + divan::main(); +} diff --git a/crates/typos-cli/benches/corrections.rs b/crates/typos-cli/benches/corrections.rs deleted file mode 100644 index fd5a6a476..000000000 --- a/crates/typos-cli/benches/corrections.rs +++ /dev/null @@ -1,116 +0,0 @@ -use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; - -fn bench_dict_load(c: &mut Criterion) { - let mut group = c.benchmark_group("load"); - group.bench_function(BenchmarkId::new("load", "builtin"), |b| { - b.iter(|| typos_cli::dict::BuiltIn::new(Default::default())); - }); - group.finish(); -} - -fn bench_dict_correct_word(c: &mut Criterion) { - let mut group = c.benchmark_group("correct_word"); - - { - let case = "ok"; - let input = "finalizes"; - group.bench_function(BenchmarkId::new("en", case), |b| { - let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En); - let input = typos::tokens::Word::new(input, 0).unwrap(); - #[cfg(feature = "vars")] - assert_eq!(corrections.correct_word(input), None); - b.iter(|| corrections.correct_word(input)); - }); - #[cfg(feature = "vars")] - group.bench_function(BenchmarkId::new("en-us", case), |b| { - let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::EnUs); - let input = typos::tokens::Word::new(input, 0).unwrap(); - #[cfg(feature = "vars")] - assert_eq!(corrections.correct_word(input), Some(typos::Status::Valid)); - b.iter(|| corrections.correct_word(input)); - }); - } - { - let case = "misspell"; - let input = "finallizes"; - let output = "finalizes"; - group.bench_function(BenchmarkId::new("en", case), |b| { - let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En); - let input = typos::tokens::Word::new(input, 0).unwrap(); - assert_eq!( - corrections.correct_word(input), - Some(typos::Status::Corrections(vec![ - std::borrow::Cow::Borrowed(output) - ])) - ); - b.iter(|| corrections.correct_word(input)); - }); - #[cfg(feature = "vars")] - group.bench_function(BenchmarkId::new("en-us", case), |b| { - let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::EnUs); - let input = typos::tokens::Word::new(input, 0).unwrap(); - assert_eq!( - corrections.correct_word(input), - Some(typos::Status::Corrections(vec![ - std::borrow::Cow::Borrowed(output) - ])) - ); - b.iter(|| corrections.correct_word(input)); - }); - } - { - let case = "misspell_case"; - let input = "FINALLIZES"; - let output = "FINALIZES"; - group.bench_function(BenchmarkId::new("en", case), |b| { - let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::En); - let input = typos::tokens::Word::new(input, 0).unwrap(); - assert_eq!( - corrections.correct_word(input), - Some(typos::Status::Corrections(vec![ - std::borrow::Cow::Borrowed(output) - ])) - ); - b.iter(|| corrections.correct_word(input)); - }); - } - #[cfg(feature = "vars")] - { - let case = "varcon"; - let input = "finalizes"; - let output = "finalises"; - group.bench_function(BenchmarkId::new("en-gb", case), |b| { - let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::EnGb); - let input = typos::tokens::Word::new(input, 0).unwrap(); - assert_eq!( - corrections.correct_word(input), - Some(typos::Status::Corrections(vec![ - std::borrow::Cow::Borrowed(output) - ])) - ); - b.iter(|| corrections.correct_word(input)); - }); - } - #[cfg(feature = "vars")] - { - let case = "misspell_varcon"; - let input = "finallizes"; - let output = "finalises"; - group.bench_function(BenchmarkId::new("en-gb", case), |b| { - let corrections = typos_cli::dict::BuiltIn::new(typos_cli::config::Locale::EnGb); - let input = typos::tokens::Word::new(input, 0).unwrap(); - assert_eq!( - corrections.correct_word(input), - Some(typos::Status::Corrections(vec![ - std::borrow::Cow::Borrowed(output) - ])) - ); - b.iter(|| corrections.correct_word(input)); - }); - } - - group.finish(); -} - -criterion_group!(benches, bench_dict_load, bench_dict_correct_word); -criterion_main!(benches); diff --git a/crates/typos-cli/benches/data.rs b/crates/typos-cli/benches/data.rs index 87dc68e35..9269f568c 100644 --- a/crates/typos-cli/benches/data.rs +++ b/crates/typos-cli/benches/data.rs @@ -30,11 +30,30 @@ fn main() { pub static CORPUS: &str = include_str!("../../typos-dict/assets/words.csv"); -pub static DATA: &[(&str, &str)] = &[ - ("empty", EMPTY), - ("no_tokens", NO_TOKENS), - ("single_token", SINGLE_TOKEN), - ("sherlock", SHERLOCK), - ("code", CODE), - ("corpus", CORPUS), +#[derive(Debug)] +pub struct Data(&'static str, &'static str); + +impl Data { + pub const fn name(&self) -> &'static str { + self.0 + } + + pub const fn content(&self) -> &'static str { + self.1 + } +} + +impl std::fmt::Display for Data { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.name().fmt(f) + } +} + +pub static DATA: &[Data] = &[ + Data("empty", EMPTY), + Data("no_tokens", NO_TOKENS), + Data("single_token", SINGLE_TOKEN), + Data("sherlock", SHERLOCK), + Data("code", CODE), + Data("corpus", CORPUS), ]; diff --git a/crates/typos-cli/benches/tokenize.rs b/crates/typos-cli/benches/tokenize.rs index 9c7399da2..650a96a58 100644 --- a/crates/typos-cli/benches/tokenize.rs +++ b/crates/typos-cli/benches/tokenize.rs @@ -1,93 +1,98 @@ mod data; -use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +mod parse_str { + use super::*; -fn bench_parse_str(c: &mut Criterion) { - let mut group = c.benchmark_group("parse_str"); - for (name, sample) in data::DATA { - let len = sample.len(); - group.throughput(Throughput::Bytes(len as u64)); - group.bench_with_input(BenchmarkId::new("unicode", name), &len, |b, _| { - let parser = typos::tokens::TokenizerBuilder::new().unicode(true).build(); - b.iter(|| parser.parse_str(sample).last()); - }); - group.bench_with_input(BenchmarkId::new("ascii", name), &len, |b, _| { - let parser = typos::tokens::TokenizerBuilder::new() - .unicode(false) - .build(); - b.iter(|| parser.parse_str(sample).last()); - }); + #[divan::bench(args = data::DATA)] + fn ascii(bencher: divan::Bencher, sample: &data::Data) { + let unicode = false; + let parser = typos::tokens::TokenizerBuilder::new() + .unicode(unicode) + .build(); + bencher + .with_inputs(|| sample.content()) + .input_counter(divan::counter::BytesCount::of_str) + .bench_local_values(|sample| parser.parse_str(sample).last()) } - group.finish(); -} -fn bench_parse_bytes(c: &mut Criterion) { - let mut group = c.benchmark_group("parse_bytes"); - for (name, sample) in data::DATA { - let len = sample.len(); - group.throughput(Throughput::Bytes(len as u64)); - group.bench_with_input(BenchmarkId::new("unicode", name), &len, |b, _| { - let parser = typos::tokens::TokenizerBuilder::new().unicode(true).build(); - b.iter(|| parser.parse_bytes(sample.as_bytes()).last()); - }); - group.bench_with_input(BenchmarkId::new("ascii", name), &len, |b, _| { - let parser = typos::tokens::TokenizerBuilder::new() - .unicode(false) - .build(); - b.iter(|| parser.parse_bytes(sample.as_bytes()).last()); - }); + #[divan::bench(args = data::DATA)] + fn unicode(bencher: divan::Bencher, sample: &data::Data) { + let unicode = true; + let parser = typos::tokens::TokenizerBuilder::new() + .unicode(unicode) + .build(); + bencher + .with_inputs(|| sample.content()) + .input_counter(divan::counter::BytesCount::of_str) + .bench_local_values(|sample| parser.parse_str(sample).last()) } - group.finish(); } -fn bench_split(c: &mut Criterion) { - let mut group = c.benchmark_group("split"); - for (name, sample) in data::DATA { - let len = sample.len(); - group.throughput(Throughput::Bytes(len as u64)); - group.bench_with_input(BenchmarkId::new("words", name), &len, |b, _| { - let symbol = - typos::tokens::Identifier::new_unchecked(sample, typos::tokens::Case::None, 0); - b.iter(|| symbol.split().last()); - }); +mod parse_bytes { + use super::*; + + #[divan::bench(args = data::DATA)] + fn ascii(bencher: divan::Bencher, sample: &data::Data) { + let unicode = false; + let parser = typos::tokens::TokenizerBuilder::new() + .unicode(unicode) + .build(); + bencher + .with_inputs(|| sample.content().as_bytes()) + .input_counter(divan::counter::BytesCount::of_slice) + .bench_local_values(|sample| parser.parse_bytes(sample).last()) } - group.finish(); + + #[divan::bench(args = data::DATA)] + fn unicode(bencher: divan::Bencher, sample: &data::Data) { + let unicode = true; + let parser = typos::tokens::TokenizerBuilder::new() + .unicode(unicode) + .build(); + bencher + .with_inputs(|| sample.content().as_bytes()) + .input_counter(divan::counter::BytesCount::of_slice) + .bench_local_values(|sample| parser.parse_bytes(sample).last()) + } +} + +#[divan::bench(args = data::DATA)] +fn split(bencher: divan::Bencher, sample: &data::Data) { + let symbol = + typos::tokens::Identifier::new_unchecked(sample.content(), typos::tokens::Case::None, 0); + bencher + .counter(divan::counter::BytesCount::of_str(sample.content())) + .bench_local(|| symbol.split().last()) } -fn bench_parse_split(c: &mut Criterion) { - let mut group = c.benchmark_group("parse_bytes+split"); - for (name, sample) in data::DATA { - let len = sample.len(); - group.throughput(Throughput::Bytes(len as u64)); - group.bench_with_input(BenchmarkId::new("unicode", name), &len, |b, _| { - let parser = typos::tokens::TokenizerBuilder::new().unicode(true).build(); - b.iter(|| { - parser - .parse_bytes(sample.as_bytes()) - .flat_map(|i| i.split()) - .last() - }); - }); - group.bench_with_input(BenchmarkId::new("ascii", name), &len, |b, _| { - let parser = typos::tokens::TokenizerBuilder::new() - .unicode(false) - .build(); - b.iter(|| { - parser - .parse_bytes(sample.as_bytes()) - .flat_map(|i| i.split()) - .last() - }); - }); +mod parse_split_bytes { + use super::*; + + #[divan::bench(args = data::DATA)] + fn ascii(bencher: divan::Bencher, sample: &data::Data) { + let unicode = false; + let parser = typos::tokens::TokenizerBuilder::new() + .unicode(unicode) + .build(); + bencher + .with_inputs(|| sample.content().as_bytes()) + .input_counter(divan::counter::BytesCount::of_slice) + .bench_local_values(|sample| parser.parse_bytes(sample).flat_map(|i| i.split()).last()) + } + + #[divan::bench(args = data::DATA)] + fn unicode(bencher: divan::Bencher, sample: &data::Data) { + let unicode = true; + let parser = typos::tokens::TokenizerBuilder::new() + .unicode(unicode) + .build(); + bencher + .with_inputs(|| sample.content().as_bytes()) + .input_counter(divan::counter::BytesCount::of_slice) + .bench_local_values(|sample| parser.parse_bytes(sample).flat_map(|i| i.split()).last()) } - group.finish(); } -criterion_group!( - benches, - bench_parse_str, - bench_parse_bytes, - bench_split, - bench_parse_split -); -criterion_main!(benches); +fn main() { + divan::main(); +}