Skip to main content

bge_m3_embedding_server/probe/
cache.rs

1// Copyright (c) 2026 J. Patrick Fulton
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Persistent cache of fitted probe coefficients on the EFS volume.
16//!
17//! The cache key is `{server_version, model, max_seq, arch}`. When the
18//! fingerprint matches the current server's configuration, the probe is
19//! skipped and the cached `(a, b)` are used immediately.
20
21use std::path::Path;
22
23use tracing::{info, warn};
24
25#[derive(serde::Serialize, serde::Deserialize)]
26struct ProbeCache {
27    schema_version: u32,
28    server_version: String,
29    model: String,
30    max_seq: usize,
31    arch: String,
32    fitted_at_unix: u64,
33    a: f64,
34    b: f64,
35}
36
37/// Attempts to load cached probe coefficients from `{cache_dir}/probe-coefficients.json`.
38///
39/// Returns `Some((a, b))` when a valid, fingerprint-matching cache file exists.
40/// Returns `None` when the file is absent, unreadable, or the fingerprint does
41/// not match the current `(server_version, model_variant, max_seq, arch)`.
42pub(crate) fn try_load_probe_cache(
43    cache_dir: &Path,
44    model_variant: &str,
45    max_seq: usize,
46) -> Option<(f64, f64)> {
47    let path = cache_dir.join("probe-coefficients.json");
48    let raw = std::fs::read_to_string(&path).ok()?;
49    let cache: ProbeCache = serde_json::from_str(&raw).ok()?;
50
51    let current_version = env!("CARGO_PKG_VERSION");
52    let current_arch = std::env::consts::ARCH;
53
54    if cache.schema_version != 1
55        || cache.server_version != current_version
56        || cache.model != model_variant
57        || cache.max_seq != max_seq
58        || cache.arch != current_arch
59    {
60        info!(
61            cached_version = %cache.server_version,
62            current_version,
63            cached_model = %cache.model,
64            model_variant,
65            cached_max_seq = cache.max_seq,
66            max_seq,
67            cached_arch = %cache.arch,
68            current_arch,
69            "Probe cache fingerprint mismatch; will re-probe"
70        );
71        return None;
72    }
73
74    if cache.a <= 0.0 || cache.b <= 0.0 {
75        warn!("Probe cache has non-positive coefficients; ignoring");
76        return None;
77    }
78
79    info!(
80        a = cache.a,
81        b = cache.b,
82        fitted_at_unix = cache.fitted_at_unix,
83        "Probe cache hit — skipping startup probe"
84    );
85    Some((cache.a, cache.b))
86}
87
88/// Saves fitted probe coefficients to `{cache_dir}/probe-coefficients.json`
89/// via an atomic temp-file + rename.
90///
91/// Errors are logged and silently ignored — a cache write failure must never
92/// abort the server.
93pub(crate) fn save_probe_cache(
94    cache_dir: &Path,
95    model_variant: &str,
96    max_seq: usize,
97    a: f64,
98    b: f64,
99) {
100    let fitted_at_unix = std::time::SystemTime::now()
101        .duration_since(std::time::UNIX_EPOCH)
102        .map_or(0, |d| d.as_secs());
103
104    let cache = ProbeCache {
105        schema_version: 1,
106        server_version: env!("CARGO_PKG_VERSION").to_string(),
107        model: model_variant.to_string(),
108        max_seq,
109        arch: std::env::consts::ARCH.to_string(),
110        fitted_at_unix,
111        a,
112        b,
113    };
114
115    let json = match serde_json::to_string_pretty(&cache) {
116        Ok(j) => j,
117        Err(e) => {
118            warn!(error = %e, "Failed to serialize probe cache; skipping write");
119            return;
120        }
121    };
122
123    let final_path = cache_dir.join("probe-coefficients.json");
124    let tmp_path = cache_dir.join("probe-coefficients.json.tmp");
125
126    if let Err(e) = std::fs::write(&tmp_path, &json) {
127        warn!(error = %e, path = %tmp_path.display(), "Failed to write probe cache temp file");
128        return;
129    }
130
131    if let Err(e) = std::fs::rename(&tmp_path, &final_path) {
132        warn!(error = %e, "Failed to atomically rename probe cache file");
133        let _ = std::fs::remove_file(&tmp_path);
134        return;
135    }
136
137    info!(
138        path = %final_path.display(),
139        a,
140        b,
141        "Probe coefficients cached to EFS"
142    );
143}