bge_m3_embedding_server/models.rs
1// Copyright (c) 2026 J. Patrick Fulton
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Request and response model types for the embedding API endpoints.
16//!
17//! Dense types are OpenAI-compatible. Sparse and dual types are BGE-M3
18//! specific; they extend the same request shape with additional output fields.
19
20use serde::{Deserialize, Deserializer, Serialize};
21
22// ---------------------------------------------------------------------------
23// TextInput — accepts either a single string or an array of strings
24// ---------------------------------------------------------------------------
25
26/// Newtype wrapping a `Vec<String>` that deserializes from either
27/// `"a single string"` or `["array", "of", "strings"]`.
28#[derive(Debug, PartialEq)]
29pub struct TextInput(pub Vec<String>);
30
31impl<'de> Deserialize<'de> for TextInput {
32 fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
33 #[derive(Deserialize)]
34 #[serde(untagged)]
35 enum StringOrArray {
36 Single(String),
37 Multiple(Vec<String>),
38 }
39
40 match StringOrArray::deserialize(deserializer)? {
41 StringOrArray::Single(s) => Ok(TextInput(vec![s])),
42 StringOrArray::Multiple(v) => Ok(TextInput(v)),
43 }
44 }
45}
46
47// ---------------------------------------------------------------------------
48// Dense embedding types
49// ---------------------------------------------------------------------------
50
51/// Request body for the dense embeddings endpoint.
52#[derive(Debug, Deserialize)]
53pub struct DenseRequest {
54 /// Input texts to generate embeddings for.
55 pub input: TextInput,
56 /// Accepted for `OpenAI` API compatibility; value is ignored — always uses BGE-M3.
57 pub model: Option<String>,
58}
59
60/// Top-level response for the dense embeddings endpoint (OpenAI-compatible).
61#[derive(Debug, Serialize)]
62pub struct DenseResponse {
63 /// Always `"list"`.
64 pub object: &'static str,
65 /// Always `"bge-m3"`.
66 pub model: &'static str,
67 /// Per-document dense embedding entries, one per input text.
68 pub data: Vec<DenseEmbeddingData>,
69 /// Aggregate token usage estimates.
70 pub usage: Usage,
71}
72
73/// Per-document dense embedding entry.
74#[derive(Debug, Serialize)]
75pub struct DenseEmbeddingData {
76 /// Always `"embedding"`.
77 pub object: &'static str,
78 /// Zero-based position of this document in the request's input array.
79 pub index: usize,
80 /// L2-normalized 1024-dimensional dense embedding vector.
81 pub embedding: Vec<f32>,
82}
83
84/// Token usage counters.
85#[derive(Debug, Serialize)]
86pub struct Usage {
87 /// Estimated input token count (approximated as `chars / 4 + 1` per text).
88 pub prompt_tokens: usize,
89 /// Same as `prompt_tokens` — embedding models have no completion tokens.
90 pub total_tokens: usize,
91}
92
93// ---------------------------------------------------------------------------
94// Sparse embedding types
95// ---------------------------------------------------------------------------
96
97/// Request body for the sparse embeddings endpoint.
98#[derive(Debug, Deserialize)]
99pub struct SparseRequest {
100 /// Input texts to generate sparse embeddings for.
101 pub input: TextInput,
102}
103
104/// Top-level response for the sparse embeddings endpoint.
105#[derive(Debug, Serialize)]
106pub struct SparseResponse {
107 /// Per-document sparse embedding entries, one per input text.
108 pub data: Vec<SparseEmbeddingData>,
109}
110
111/// Per-document sparse embedding entry.
112#[derive(Debug, Serialize)]
113pub struct SparseEmbeddingData {
114 /// Zero-based position of this document in the request's input array.
115 pub index: usize,
116 /// Non-zero vocabulary token weights for this document.
117 pub sparse_values: SparseValues,
118}
119
120/// Parallel arrays of token indices and their weights.
121#[derive(Debug, Serialize)]
122pub struct SparseValues {
123 /// Sorted vocabulary token IDs with non-zero ReLU-gated weight.
124 pub indices: Vec<u32>,
125 /// ReLU-gated weights corresponding to each index, in the same order.
126 pub values: Vec<f32>,
127}
128
129// ---------------------------------------------------------------------------
130// Dual embedding types (single forward pass yielding both dense and sparse)
131// ---------------------------------------------------------------------------
132
133/// Request body for the unified dense + sparse embeddings endpoint.
134#[derive(Debug, Deserialize)]
135pub struct DualRequest {
136 /// Input texts to generate dense and sparse embeddings for.
137 pub input: TextInput,
138 /// Accepted for `OpenAI` API compatibility; always uses BGE-M3.
139 pub model: Option<String>,
140}
141
142/// Top-level response for the unified dense + sparse embeddings endpoint.
143#[derive(Debug, Serialize)]
144pub struct DualResponse {
145 /// Always `"list"`.
146 pub object: &'static str,
147 /// Always `"bge-m3"`.
148 pub model: &'static str,
149 /// Per-document paired dense + sparse embedding entries.
150 pub data: Vec<DualEmbeddingData>,
151 /// Aggregate token usage estimates.
152 pub usage: Usage,
153}
154
155/// Per-document paired dense + sparse embedding entry.
156#[derive(Debug, Serialize)]
157pub struct DualEmbeddingData {
158 /// Zero-based position of this document in the request's input array.
159 pub index: usize,
160 /// L2-normalized 1024-dimensional dense embedding vector.
161 pub embedding: Vec<f32>,
162 /// Non-zero vocabulary token weights for this document.
163 pub sparse_values: SparseValues,
164}
165
166// ---------------------------------------------------------------------------
167// Models list types
168// ---------------------------------------------------------------------------
169
170/// Top-level response for GET /v1/models (OpenAI-compatible).
171#[derive(Debug, Serialize)]
172pub struct ModelsResponse {
173 /// Always `"list"`.
174 pub object: &'static str,
175 /// List of available model entries.
176 pub data: Vec<ModelEntry>,
177}
178
179/// A single model entry.
180#[derive(Debug, Serialize)]
181pub struct ModelEntry {
182 /// Model identifier — always `"bge-m3"`.
183 pub id: &'static str,
184 /// Always `"model"`.
185 pub object: &'static str,
186}
187
188#[cfg(test)]
189mod tests;