Skip to content

Commit

Permalink
feat(inverted_index.search): add fst applier (#2851)
Browse files Browse the repository at this point in the history
* feat(inverted_index.search): add fst applier

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

* fix: typos

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>

---------

Signed-off-by: Zhenchi <zhongzc_arch@outlook.com>
  • Loading branch information
zhongzc committed Dec 4, 2023
1 parent 806400c commit 58c1373
Show file tree
Hide file tree
Showing 17 changed files with 810 additions and 11 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ prost = "0.12"
raft-engine = { git = "https://github.com/tikv/raft-engine.git", rev = "22dfb426cd994602b57725ef080287d3e53db479" }
rand = "0.8"
regex = "1.8"
regex-automata = { version = "0.1", features = ["transducer"] }
reqwest = { version = "0.11", default-features = false, features = [
"json",
"rustls-tls-native-roots",
Expand Down
2 changes: 2 additions & 0 deletions src/index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ fst.workspace = true
futures.workspace = true
greptime-proto.workspace = true
prost.workspace = true
regex-automata.workspace = true
regex.workspace = true
snafu.workspace = true

[dev-dependencies]
Expand Down
4 changes: 4 additions & 0 deletions src/index/src/inverted_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,7 @@

pub mod error;
pub mod format;
pub mod search;

pub type FstMap = fst::Map<Vec<u8>>;
pub type Bytes = Vec<u8>;
43 changes: 42 additions & 1 deletion src/index/src/inverted_index/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ use common_error::status_code::StatusCode;
use common_macro::stack_trace_debug;
use snafu::{Location, Snafu};

use crate::inverted_index::search::predicate::Predicate;

#[derive(Snafu)]
#[snafu(visibility(pub))]
#[stack_trace_debug]
Expand Down Expand Up @@ -75,6 +77,38 @@ pub enum Error {
error: prost::DecodeError,
location: Location,
},

#[snafu(display("Failed to parse regex pattern: {pattern}"))]
ParseRegex {
#[snafu(source)]
error: regex::Error,
pattern: String,
location: Location,
},

#[snafu(display("Failed to parse regex DFA"))]
ParseDFA {
#[snafu(source)]
error: regex_automata::Error,
location: Location,
},

#[snafu(display("Unexpected empty predicates to construct fst applier"))]
EmptyPredicates { location: Location },

#[snafu(display("Failed to construct intersection fst applier with InList predicate"))]
IntersectionApplierWithInList { location: Location },

#[snafu(display("Failed to construct keys fst applier without InList predicate"))]
KeysApplierWithoutInList { location: Location },

#[snafu(display(
"Failed to construct keys fst applier with unexpected predicates: {predicates:?}"
))]
KeysApplierUnexpectedPredicates {
location: Location,
predicates: Vec<Predicate>,
},
}

impl ErrorExt for Error {
Expand All @@ -87,7 +121,14 @@ impl ErrorExt for Error {
| UnexpectedOffsetSize { .. }
| UnexpectedBlobSize { .. }
| DecodeProto { .. }
| DecodeFst { .. } => StatusCode::Unexpected,
| DecodeFst { .. }
| KeysApplierUnexpectedPredicates { .. } => StatusCode::Unexpected,

ParseRegex { .. }
| ParseDFA { .. }
| KeysApplierWithoutInList { .. }
| IntersectionApplierWithInList { .. }
| EmptyPredicates { .. } => StatusCode::InvalidArguments,
}
}

Expand Down
4 changes: 1 addition & 3 deletions src/index/src/inverted_index/format/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@ mod footer;

use async_trait::async_trait;
use common_base::BitVec;
use fst::Map;
use greptime_proto::v1::index::{InvertedIndexMeta, InvertedIndexMetas};

use crate::inverted_index::error::Result;

pub type FstMap = Map<Vec<u8>>;
use crate::inverted_index::FstMap;

/// InvertedIndexReader defines an asynchronous reader of inverted index data
#[async_trait]
Expand Down
16 changes: 16 additions & 0 deletions src/index/src/inverted_index/search.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

pub mod fst_apply;
pub mod predicate;
32 changes: 32 additions & 0 deletions src/index/src/inverted_index/search/fst_apply.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

mod intersection_apply;
mod keys_apply;

pub use intersection_apply::IntersectionFstApplier;
pub use keys_apply::KeysFstApplier;

use crate::inverted_index::FstMap;

/// A trait for objects that can process a finite state transducer (FstMap) and return
/// associated values.
pub trait FstApplier: Send + Sync {
/// Retrieves values from an FstMap.
///
/// * `fst`: A reference to the FstMap from which the values will be fetched.
///
/// Returns a `Vec<u64>`, with each u64 being a value from the FstMap.
fn apply(&self, fst: &FstMap) -> Vec<u64>;
}

0 comments on commit 58c1373

Please sign in to comment.