Skip to main content

bge_m3_embedding_server/
embedder.rs

1// Copyright (c) 2026 J. Patrick Fulton
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Worker-pool–driven BGE-M3 embedding service.
16//!
17//! Submodules:
18//! - `types`: public DTOs and the internal `EmbedRequest` enum.
19//! - `error`: small `ort::Error → anyhow::Error` adapter.
20//! - `model_files`: hf-hub download / cache layout for the ONNX model files.
21//! - `tokenize`: tokenizer load + no-pad tokenization + chunk-array build.
22//! - `session`: ORT execution-provider config and session loading.
23//! - `math`: pure dense/sparse math helpers (testable without ORT).
24//! - `dense`: dense embedding pipeline.
25//! - `sparse`: BGE-M3 SPLADE-style sparse embedding pipeline.
26//! - `dual`: paired dense + sparse embedding pipeline (one forward pass).
27//! - `worker`: blocking worker thread, request dispatch, probe wiring.
28//! - `pool`: `EmbedPool` async wrapper and test helpers.
29
30mod dense;
31mod dual;
32mod error;
33mod math;
34mod model_files;
35mod pool;
36mod session;
37mod sparse;
38mod tokenize;
39mod types;
40mod worker;
41
42pub use pool::EmbedPool;
43pub(crate) use types::OS_HEADROOM_BYTES;
44pub(crate) use worker::WorkerConfig;
45
46// `SparseEmbedding` is referenced by tests via `crate::embedder::SparseEmbedding`,
47// but is not used outside the module in the non-test build. The cfg(test) gate
48// keeps the binary's unused-import lint clean while preserving the call-site path
49// for tests.
50#[cfg(test)]
51pub(crate) use types::SparseEmbedding;