Skip to main content

bge_m3_embedding_server/embedder/
model_files.rs

1// Copyright (c) 2026 J. Patrick Fulton
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! `HuggingFace` Hub download + cache-layout helpers for the BGE-M3 model files.
16
17use std::path::{Path, PathBuf};
18
19use anyhow::Result;
20use tracing::info;
21
22use crate::config::ModelVariant;
23
24const REPO_ID: &str = "BAAI/bge-m3";
25/// Pinned HF commit — prevents silent model updates and provides supply-chain
26/// integrity for the ONNX weights and tokenizer. Update this hash intentionally
27/// after verifying a new revision produces equivalent embeddings.
28const REPO_REVISION: &str = "5617a9f61b028005a4858fdac845db406aefb181";
29
30const XENOVA_REPO_ID: &str = "Xenova/bge-m3";
31/// Pinned HF commit for the Xenova/bge-m3 FP16 (~1.08 GB) and INT8 (~568 MB) models.
32/// Update intentionally after verifying equivalent embedding quality vs FP32.
33const XENOVA_REPO_REVISION: &str = "4de13258303883538bd53b696b452bf8099f0858";
34
35/// Paths to the ONNX model and tokenizer files resolved from the hf-hub cache.
36pub(super) struct ModelFiles {
37    /// Path to the ONNX model file (variant-specific).
38    pub onnx_path: PathBuf,
39    /// Path to the `tokenizer.json` file.
40    pub tokenizer_path: PathBuf,
41}
42
43/// Returns `true` when the primary ONNX model file already exists in the
44/// hf-hub snapshot cache, meaning `repo.get()` will return immediately
45/// without fetching from the network.
46///
47/// hf-hub 0.5.x layout when constructed with `ApiBuilder::with_cache_dir(p)`:
48/// `{p}/models--{owner}--{name}/snapshots/{revision}/{filename}`
49///
50/// Note: this differs from Python `huggingface_hub`, which appends a `hub/`
51/// segment when `HF_HOME` is set. The Rust crate treats `with_cache_dir`
52/// as `HF_HUB_CACHE` directly — no `hub/` subdirectory is added.
53fn is_model_cached(cache_dir: &Path, repo_id: &str, revision: &str, onnx_filename: &str) -> bool {
54    let repo_dir = format!("models--{}", repo_id.replace('/', "--"));
55    cache_dir
56        .join(repo_dir)
57        .join("snapshots")
58        .join(revision)
59        .join(onnx_filename)
60        .exists()
61}
62
63/// Downloads (or retrieves from the local hf-hub snapshot cache) the ONNX model
64/// and tokenizer files for the given model variant.
65///
66/// `show_progress` enables hf-hub's download progress bar; pass `true` only for
67/// the leader worker (worker 0) so progress is shown exactly once.
68pub(super) fn download_model_files(
69    cache_dir: &Path,
70    show_progress: bool,
71    variant: ModelVariant,
72) -> Result<ModelFiles> {
73    let (repo_id, repo_revision) = match variant {
74        ModelVariant::Fp32 => (REPO_ID, REPO_REVISION),
75        ModelVariant::Fp16 | ModelVariant::Int8 => (XENOVA_REPO_ID, XENOVA_REPO_REVISION),
76    };
77
78    // Check the hf-hub snapshot directory for the primary ONNX file before
79    // touching the network.  This lets us log a clear "from cache" message
80    // rather than silence while hf-hub resolves files.
81    let onnx_filename = match variant {
82        ModelVariant::Fp32 => "onnx/model.onnx",
83        ModelVariant::Fp16 => "onnx/model_fp16.onnx",
84        ModelVariant::Int8 => "onnx/model_int8.onnx",
85    };
86    let cached = is_model_cached(cache_dir, repo_id, repo_revision, onnx_filename);
87    if cached {
88        info!(
89            repo_id,
90            revision = repo_revision,
91            model_variant = %variant,
92            "Model files found in local cache — no download needed"
93        );
94    } else {
95        info!(
96            repo_id,
97            revision = repo_revision,
98            model_variant = %variant,
99            "Model files not in local cache — downloading from HuggingFace Hub"
100        );
101    }
102
103    let api = hf_hub::api::sync::ApiBuilder::new()
104        .with_cache_dir(cache_dir.to_path_buf())
105        .with_progress(show_progress)
106        .build()
107        .map_err(|e| anyhow::anyhow!("Failed to build hf-hub API: {e}"))?;
108
109    let repo = api.repo(hf_hub::Repo::with_revision(
110        repo_id.to_string(),
111        hf_hub::RepoType::Model,
112        repo_revision.to_string(),
113    ));
114
115    let onnx_path = match variant {
116        ModelVariant::Fp32 => {
117            let path = repo
118                .get("onnx/model.onnx")
119                .map_err(|e| anyhow::anyhow!("Failed to get onnx/model.onnx: {e}"))?;
120            repo.get("onnx/model.onnx_data")
121                .map_err(|e| anyhow::anyhow!("Failed to get onnx/model.onnx_data: {e}"))?;
122            repo.get("onnx/Constant_7_attr__value")
123                .map_err(|e| anyhow::anyhow!("Failed to get onnx/Constant_7_attr__value: {e}"))?;
124            path
125        }
126        ModelVariant::Fp16 => repo
127            .get("onnx/model_fp16.onnx")
128            .map_err(|e| anyhow::anyhow!("Failed to get onnx/model_fp16.onnx: {e}"))?,
129        ModelVariant::Int8 => repo
130            .get("onnx/model_int8.onnx")
131            .map_err(|e| anyhow::anyhow!("Failed to get onnx/model_int8.onnx: {e}"))?,
132    };
133
134    let tokenizer_path = repo
135        .get("tokenizer.json")
136        .map_err(|e| anyhow::anyhow!("Failed to get tokenizer.json: {e}"))?;
137
138    Ok(ModelFiles {
139        onnx_path,
140        tokenizer_path,
141    })
142}