bge_m3_embedding_server/handler/
health.rs

1// Copyright (c) 2026 J. Patrick Fulton
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! `GET /health` handler — readiness status, worker counts, and tuning diagnostics.
16
17use std::sync::atomic::Ordering;
18use std::sync::Arc;
19
20use axum::{extract::State, http::StatusCode, response::IntoResponse, Json};
21
22use crate::state::{AppState, ProbeStatus};
23
24/// Handles `GET /health` — returns readiness status, worker counts, and tuning diagnostics.
25///
26/// Returns `503` while models are loading or if all workers have exited; returns
27/// `200 ok` (or `200 warn` when fewer workers are live than configured) with the
28/// current cost-model coefficients and probe status in the `tuning` block.
29pub async fn health(State(state): State<Arc<AppState>>) -> impl IntoResponse {
30    let ready = state.ready.load(Ordering::Acquire);
31    let live = state.pool.live_worker_count();
32    let loaded = state.pool.loaded_worker_count();
33    let total = state.total_workers;
34
35    if !ready {
36        return (
37            StatusCode::SERVICE_UNAVAILABLE,
38            Json(serde_json::json!({"status": "loading"})),
39        )
40            .into_response();
41    }
42
43    if live == 0 {
44        return (
45            StatusCode::SERVICE_UNAVAILABLE,
46            Json(serde_json::json!({
47                "status": "fail",
48                "workers": { "live": live, "total": total }
49            })),
50        )
51            .into_response();
52    }
53
54    if loaded == 0 {
55        return (
56            StatusCode::OK,
57            Json(serde_json::json!({
58                "status": "idle",
59                "workers": { "live": live, "total": total }
60            })),
61        )
62            .into_response();
63    }
64
65    let status = if live < total { "warn" } else { "ok" };
66
67    // Read the live cost model and probe status atomically.
68    let cm = state.cost_model.load();
69    let probe_status = ProbeStatus::from_u8(state.probe_status.load(Ordering::Acquire)).as_str();
70
71    let mut tuning = serde_json::json!({
72        "a_bytes_per_token": cm.a,
73        "b_bytes_per_token_sq": cm.b,
74        "max_workspace_bytes": cm.max_workspace_bytes,
75        "probe_status": probe_status,
76    });
77
78    // Add static memory fields when available (written before probe starts).
79    if let Some(ti) = state.tuning.get() {
80        tuning["memory_source"] = serde_json::Value::String(ti.memory_source.clone());
81        tuning["available_bytes"] =
82            serde_json::Value::Number(serde_json::Number::from(ti.available_bytes));
83        tuning["model_rss_bytes_per_worker"] =
84            serde_json::Value::Number(serde_json::Number::from(ti.model_rss_bytes_per_worker));
85    }
86
87    let body = serde_json::json!({
88        "status": status,
89        "workers": { "live": live, "total": total },
90        "max_seq_length": state.max_seq_length,
91        "tuning": tuning,
92    });
93
94    (StatusCode::OK, Json(body)).into_response()
95}
bge_m3_embedding_server/handler/health.rs

bge_m3_embedding_server/handler/
health.rs