From f1a4eb9c28ed50c9f0613f8b6564eafa541b591b Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Thu, 7 Sep 2023 10:19:25 +0200 Subject: [PATCH] Use the unicode-ident crate (#7212) --- Cargo.lock | 29 ++------------------ Cargo.toml | 2 +- crates/ruff_python_parser/Cargo.toml | 3 +- crates/ruff_python_parser/src/lexer.rs | 12 +------- crates/ruff_python_stdlib/Cargo.toml | 2 +- crates/ruff_python_stdlib/src/identifiers.rs | 2 +- crates/ruff_python_trivia/Cargo.toml | 2 +- crates/ruff_python_trivia/src/tokenizer.rs | 2 +- 8 files changed, 10 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d59ed4c834f2f..3c7bb27a00ff4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2432,8 +2432,7 @@ dependencies = [ "static_assertions", "test-case", "tiny-keccak", - "unic-emoji-char", - "unic-ucd-ident", + "unicode-ident", "unicode_names2", ] @@ -2468,7 +2467,7 @@ dependencies = [ name = "ruff_python_stdlib" version = "0.0.0" dependencies = [ - "unic-ucd-ident", + "unicode-ident", ] [[package]] @@ -2482,7 +2481,7 @@ dependencies = [ "ruff_source_file", "ruff_text_size", "smallvec", - "unic-ucd-ident", + "unicode-ident", ] [[package]] @@ -3280,17 +3279,6 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" -[[package]] -name = "unic-emoji-char" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - [[package]] name = "unic-ucd-category" version = "0.9.0" @@ -3303,17 +3291,6 @@ dependencies = [ "unic-ucd-version", ] -[[package]] -name = "unic-ucd-ident" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e230a37c0381caa9219d67cf063aa3a375ffed5bf541a452db16e744bdab6987" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - [[package]] name = "unic-ucd-version" version = "0.9.0" diff --git a/Cargo.toml b/Cargo.toml index ecceb3f27b72a..0b49a28f06a71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,7 +49,7 @@ toml = { version = "0.7.2" } tracing = "0.1.37" tracing-indicatif = "0.3.4" tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } -unic-ucd-ident = "0.9.0" +unicode-ident = "1.0.11" unicode-width = "0.1.10" uuid = { version = "1.4.1", features = ["v4", "fast-rng", "macro-diagnostics", "js"] } wsl = { version = "0.1.0" } diff --git a/crates/ruff_python_parser/Cargo.toml b/crates/ruff_python_parser/Cargo.toml index a4832dc592876..0718e2721626e 100644 --- a/crates/ruff_python_parser/Cargo.toml +++ b/crates/ruff_python_parser/Cargo.toml @@ -23,8 +23,7 @@ itertools = { workspace = true } lalrpop-util = { version = "0.20.0", default-features = false } num-bigint = { workspace = true } num-traits = { workspace = true } -unic-emoji-char = "0.9.0" -unic-ucd-ident = { workspace = true } +unicode-ident = { workspace = true } unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" } rustc-hash = { workspace = true } static_assertions = "1.1.0" diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 52f4bf1829735..167250179dcec 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -36,8 +36,7 @@ use num_bigint::BigInt; use num_traits::{Num, Zero}; use ruff_python_ast::IpyEscapeKind; use ruff_text_size::{TextLen, TextRange, TextSize}; -use unic_emoji_char::is_emoji_presentation; -use unic_ucd_ident::{is_xid_continue, is_xid_start}; +use unicode_ident::{is_xid_continue, is_xid_start}; use crate::lexer::cursor::{Cursor, EOF_CHAR}; use crate::lexer::indentation::{Indentation, Indentations}; @@ -597,15 +596,6 @@ impl<'source> Lexer<'source> { self.state = State::Other; Ok((identifier, self.token_range())) - } else if is_emoji_presentation(c) { - self.state = State::Other; - - Ok(( - Tok::Name { - name: c.to_string(), - }, - self.token_range(), - )) } else { Err(LexicalError { error: LexicalErrorType::UnrecognizedToken { tok: c }, diff --git a/crates/ruff_python_stdlib/Cargo.toml b/crates/ruff_python_stdlib/Cargo.toml index daaa540a293e2..807ccc0f5cda1 100644 --- a/crates/ruff_python_stdlib/Cargo.toml +++ b/crates/ruff_python_stdlib/Cargo.toml @@ -13,4 +13,4 @@ license = { workspace = true } [lib] [dependencies] -unic-ucd-ident = { workspace = true } +unicode-ident = { workspace = true } diff --git a/crates/ruff_python_stdlib/src/identifiers.rs b/crates/ruff_python_stdlib/src/identifiers.rs index a649c19c192c3..ab29b3877cffa 100644 --- a/crates/ruff_python_stdlib/src/identifiers.rs +++ b/crates/ruff_python_stdlib/src/identifiers.rs @@ -1,4 +1,4 @@ -use unic_ucd_ident::{is_xid_continue, is_xid_start}; +use unicode_ident::{is_xid_continue, is_xid_start}; use crate::keyword::is_keyword; diff --git a/crates/ruff_python_trivia/Cargo.toml b/crates/ruff_python_trivia/Cargo.toml index 75273acd1d3bf..8ae3fad931433 100644 --- a/crates/ruff_python_trivia/Cargo.toml +++ b/crates/ruff_python_trivia/Cargo.toml @@ -18,7 +18,7 @@ ruff_source_file = { path = "../ruff_source_file" } memchr = { workspace = true } smallvec = { workspace = true } -unic-ucd-ident = { workspace = true } +unicode-ident = { workspace = true } [dev-dependencies] insta = { workspace = true } diff --git a/crates/ruff_python_trivia/src/tokenizer.rs b/crates/ruff_python_trivia/src/tokenizer.rs index 93aa9fb16a7b6..d7d0645f3fb7b 100644 --- a/crates/ruff_python_trivia/src/tokenizer.rs +++ b/crates/ruff_python_trivia/src/tokenizer.rs @@ -1,5 +1,5 @@ use memchr::{memchr2, memchr3, memrchr3_iter}; -use unic_ucd_ident::{is_xid_continue, is_xid_start}; +use unicode_ident::{is_xid_continue, is_xid_start}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};