Skip to main content

bge_m3_embedding_server/
models.rs

1// Copyright (c) 2026 J. Patrick Fulton
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Request and response model types for the embedding API endpoints.
16//!
17//! Dense types are OpenAI-compatible. Sparse and dual types are BGE-M3
18//! specific; they extend the same request shape with additional output fields.
19
20use serde::{Deserialize, Deserializer, Serialize};
21
22// ---------------------------------------------------------------------------
23// TextInput — accepts either a single string or an array of strings
24// ---------------------------------------------------------------------------
25
26/// Newtype wrapping a `Vec<String>` that deserializes from either
27/// `"a single string"` or `["array", "of", "strings"]`.
28#[derive(Debug, PartialEq)]
29pub struct TextInput(pub Vec<String>);
30
31impl<'de> Deserialize<'de> for TextInput {
32    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
33        #[derive(Deserialize)]
34        #[serde(untagged)]
35        enum StringOrArray {
36            Single(String),
37            Multiple(Vec<String>),
38        }
39
40        match StringOrArray::deserialize(deserializer)? {
41            StringOrArray::Single(s) => Ok(TextInput(vec![s])),
42            StringOrArray::Multiple(v) => Ok(TextInput(v)),
43        }
44    }
45}
46
47// ---------------------------------------------------------------------------
48// Dense embedding types
49// ---------------------------------------------------------------------------
50
51/// Request body for the dense embeddings endpoint.
52#[derive(Debug, Deserialize)]
53pub struct DenseRequest {
54    /// Input texts to generate embeddings for.
55    pub input: TextInput,
56    /// Accepted for `OpenAI` API compatibility; value is ignored — always uses BGE-M3.
57    pub model: Option<String>,
58}
59
60/// Top-level response for the dense embeddings endpoint (OpenAI-compatible).
61#[derive(Debug, Serialize)]
62pub struct DenseResponse {
63    /// Always `"list"`.
64    pub object: &'static str,
65    /// Always `"bge-m3"`.
66    pub model: &'static str,
67    /// Per-document dense embedding entries, one per input text.
68    pub data: Vec<DenseEmbeddingData>,
69    /// Aggregate token usage estimates.
70    pub usage: Usage,
71}
72
73/// Per-document dense embedding entry.
74#[derive(Debug, Serialize)]
75pub struct DenseEmbeddingData {
76    /// Always `"embedding"`.
77    pub object: &'static str,
78    /// Zero-based position of this document in the request's input array.
79    pub index: usize,
80    /// L2-normalized 1024-dimensional dense embedding vector.
81    pub embedding: Vec<f32>,
82}
83
84/// Token usage counters.
85#[derive(Debug, Serialize)]
86pub struct Usage {
87    /// Estimated input token count (approximated as `chars / 4 + 1` per text).
88    pub prompt_tokens: usize,
89    /// Same as `prompt_tokens` — embedding models have no completion tokens.
90    pub total_tokens: usize,
91}
92
93// ---------------------------------------------------------------------------
94// Sparse embedding types
95// ---------------------------------------------------------------------------
96
97/// Request body for the sparse embeddings endpoint.
98#[derive(Debug, Deserialize)]
99pub struct SparseRequest {
100    /// Input texts to generate sparse embeddings for.
101    pub input: TextInput,
102}
103
104/// Top-level response for the sparse embeddings endpoint.
105#[derive(Debug, Serialize)]
106pub struct SparseResponse {
107    /// Per-document sparse embedding entries, one per input text.
108    pub data: Vec<SparseEmbeddingData>,
109}
110
111/// Per-document sparse embedding entry.
112#[derive(Debug, Serialize)]
113pub struct SparseEmbeddingData {
114    /// Zero-based position of this document in the request's input array.
115    pub index: usize,
116    /// Non-zero vocabulary token weights for this document.
117    pub sparse_values: SparseValues,
118}
119
120/// Parallel arrays of token indices and their weights.
121#[derive(Debug, Serialize)]
122pub struct SparseValues {
123    /// Sorted vocabulary token IDs with non-zero ReLU-gated weight.
124    pub indices: Vec<u32>,
125    /// ReLU-gated weights corresponding to each index, in the same order.
126    pub values: Vec<f32>,
127}
128
129// ---------------------------------------------------------------------------
130// Dual embedding types (single forward pass yielding both dense and sparse)
131// ---------------------------------------------------------------------------
132
133/// Request body for the unified dense + sparse embeddings endpoint.
134#[derive(Debug, Deserialize)]
135pub struct DualRequest {
136    /// Input texts to generate dense and sparse embeddings for.
137    pub input: TextInput,
138    /// Accepted for `OpenAI` API compatibility; always uses BGE-M3.
139    pub model: Option<String>,
140}
141
142/// Top-level response for the unified dense + sparse embeddings endpoint.
143#[derive(Debug, Serialize)]
144pub struct DualResponse {
145    /// Always `"list"`.
146    pub object: &'static str,
147    /// Always `"bge-m3"`.
148    pub model: &'static str,
149    /// Per-document paired dense + sparse embedding entries.
150    pub data: Vec<DualEmbeddingData>,
151    /// Aggregate token usage estimates.
152    pub usage: Usage,
153}
154
155/// Per-document paired dense + sparse embedding entry.
156#[derive(Debug, Serialize)]
157pub struct DualEmbeddingData {
158    /// Zero-based position of this document in the request's input array.
159    pub index: usize,
160    /// L2-normalized 1024-dimensional dense embedding vector.
161    pub embedding: Vec<f32>,
162    /// Non-zero vocabulary token weights for this document.
163    pub sparse_values: SparseValues,
164}
165
166// ---------------------------------------------------------------------------
167// Models list types
168// ---------------------------------------------------------------------------
169
170/// Top-level response for GET /v1/models (OpenAI-compatible).
171#[derive(Debug, Serialize)]
172pub struct ModelsResponse {
173    /// Always `"list"`.
174    pub object: &'static str,
175    /// List of available model entries.
176    pub data: Vec<ModelEntry>,
177}
178
179/// A single model entry.
180#[derive(Debug, Serialize)]
181pub struct ModelEntry {
182    /// Model identifier — always `"bge-m3"`.
183    pub id: &'static str,
184    /// Always `"model"`.
185    pub object: &'static str,
186}
187
188#[cfg(test)]
189mod tests;