Skip to main content

bge_m3_embedding_server/probe/
validate.rs

1// Copyright (c) 2026 J. Patrick Fulton
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Tokenizer + ndarray shape validation at the configured `max_seq`.
16
17/// Validates that the configured `max_seq` is reachable by the tokenizer and
18/// ndarray dimension math without performing a full ORT `session.run()`.
19///
20/// Replaces the old `(1, max_seq)` capability check that called `session.run()`
21/// and risked OOM-killing the container on memory-constrained hosts.
22///
23/// This function constructs the `input_ids` and `attention_mask` ndarrays at
24/// `(1, max_seq)` and logs their shapes, confirming that:
25/// - `max_seq` fits within `usize` bounds.
26/// - ndarray can allocate the 2D layout `[1, max_seq]`.
27///
28/// Note: full position-embedding coverage (whether the ONNX model actually
29/// supports the configured `max_seq`) is NOT verified here. Runtime errors
30/// from the first real `/v1/embeddings` request surface that condition with
31/// a clear ORT error, without risking startup OOM.
32pub(super) fn validate_max_seq_shape(max_seq: usize) {
33    let ids: ndarray::Array2<i64> = ndarray::Array2::zeros((1, max_seq));
34    let mask: ndarray::Array2<i64> = ndarray::Array2::ones((1, max_seq));
35    tracing::debug!(
36        max_seq,
37        ids_shape = ?ids.dim(),
38        mask_shape = ?mask.dim(),
39        "Max-seq shape validation passed (no session.run)"
40    );
41}