Skip to content

Commit ce454cf

Browse files
shulaodaoverlookmotel
and
overlookmotel
authoredAug 26, 2024··
perf: use simdutf8 to validate UTF-8 when reading files (#5196)
closes #5191 --------- Co-authored-by: overlookmotel <j@dummett.org>
1 parent fb847bd commit ce454cf

File tree

6 files changed

+32
-4
lines changed

6 files changed

+32
-4
lines changed
 

‎Cargo.lock

+8-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ seq-macro = "0.3.5"
170170
serde = "1.0.206"
171171
serde_json = "1.0.124"
172172
serde-wasm-bindgen = "0.6.5"
173+
simdutf8 = { version = "0.1.4", features = ["aarch64_neon"] }
173174
similar = "2.6.0"
174175
syn = { version = "2.0.74", default-features = false }
175176
tempfile = "3.12.0"

‎crates/oxc_linter/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ once_cell = { workspace = true }
5252
memchr = { workspace = true }
5353
json-strip-comments = { workspace = true }
5454
schemars = { workspace = true, features = ["indexmap2"] }
55+
simdutf8 = { workspace = true }
5556

5657
[dev-dependencies]
5758
insta = { workspace = true }

‎crates/oxc_linter/src/config/mod.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ pub use self::{
1717
settings::{jsdoc::JSDocPluginSettings, OxlintSettings},
1818
};
1919
use crate::{
20-
rules::RuleEnum, utils::is_jest_rule_adapted_to_vitest, AllowWarnDeny, RuleWithSeverity,
20+
rules::RuleEnum,
21+
utils::{is_jest_rule_adapted_to_vitest, read_to_string},
22+
AllowWarnDeny, RuleWithSeverity,
2123
};
2224

2325
/// Oxlint Configuration File
@@ -68,7 +70,7 @@ impl OxlintConfig {
6870
///
6971
/// * Parse Failure
7072
pub fn from_file(path: &Path) -> Result<Self, OxcDiagnostic> {
71-
let mut string = std::fs::read_to_string(path).map_err(|e| {
73+
let mut string = read_to_string(path).map_err(|e| {
7274
OxcDiagnostic::error(format!("Failed to parse config {path:?} with error {e:?}"))
7375
})?;
7476

‎crates/oxc_linter/src/service.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use rustc_hash::FxHashSet;
1919

2020
use crate::{
2121
partial_loader::{JavaScriptSource, PartialLoader, LINT_PARTIAL_LOADER_EXT},
22+
utils::read_to_string,
2223
Fixer, Linter, Message,
2324
};
2425

@@ -176,7 +177,7 @@ impl Runtime {
176177
return None;
177178
}
178179
let source_type = source_type.unwrap_or_default();
179-
let file_result = fs::read_to_string(path).map_err(|e| {
180+
let file_result = read_to_string(path).map_err(|e| {
180181
Error::new(OxcDiagnostic::error(format!(
181182
"Failed to open file {path:?} with error \"{e}\""
182183
)))

‎crates/oxc_linter/src/utils/mod.rs

+16
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ mod tree_shaking;
99
mod unicorn;
1010
mod vitest;
1111

12+
use std::{io, path::Path};
13+
1214
pub use self::{
1315
config::*, jest::*, jsdoc::*, nextjs::*, promise::*, react::*, react_perf::*, tree_shaking::*,
1416
unicorn::*, vitest::*,
@@ -37,3 +39,17 @@ pub fn is_jest_rule_adapted_to_vitest(rule_name: &str) -> bool {
3739

3840
jest_rules.contains(&rule_name)
3941
}
42+
43+
pub fn read_to_string(path: &Path) -> io::Result<String> {
44+
// `simdutf8` is faster than `std::str::from_utf8` which `fs::read_to_string` uses internally
45+
let bytes = std::fs::read(path)?;
46+
if simdutf8::basic::from_utf8(&bytes).is_err() {
47+
// Same error as `fs::read_to_string` produces (`io::Error::INVALID_UTF8`)
48+
return Err(io::Error::new(
49+
io::ErrorKind::InvalidData,
50+
"stream did not contain valid UTF-8",
51+
));
52+
}
53+
// SAFETY: `simdutf8` has ensured it's a valid UTF-8 string
54+
Ok(unsafe { String::from_utf8_unchecked(bytes) })
55+
}

0 commit comments

Comments
 (0)
Please sign in to comment.