bge_m3_embedding_server/handler/
health.rs1use std::sync::atomic::Ordering;
18use std::sync::Arc;
19
20use axum::{extract::State, http::StatusCode, response::IntoResponse, Json};
21
22use crate::state::{AppState, ProbeStatus};
23
24pub async fn health(State(state): State<Arc<AppState>>) -> impl IntoResponse {
30 let ready = state.ready.load(Ordering::Acquire);
31 let live = state.pool.live_worker_count();
32 let loaded = state.pool.loaded_worker_count();
33 let total = state.total_workers;
34
35 if !ready {
36 return (
37 StatusCode::SERVICE_UNAVAILABLE,
38 Json(serde_json::json!({"status": "loading"})),
39 )
40 .into_response();
41 }
42
43 if live == 0 {
44 return (
45 StatusCode::SERVICE_UNAVAILABLE,
46 Json(serde_json::json!({
47 "status": "fail",
48 "workers": { "live": live, "total": total }
49 })),
50 )
51 .into_response();
52 }
53
54 if loaded == 0 {
55 return (
56 StatusCode::OK,
57 Json(serde_json::json!({
58 "status": "idle",
59 "workers": { "live": live, "total": total }
60 })),
61 )
62 .into_response();
63 }
64
65 let status = if live < total { "warn" } else { "ok" };
66
67 let cm = state.cost_model.load();
69 let probe_status = ProbeStatus::from_u8(state.probe_status.load(Ordering::Acquire)).as_str();
70
71 let mut tuning = serde_json::json!({
72 "a_bytes_per_token": cm.a,
73 "b_bytes_per_token_sq": cm.b,
74 "max_workspace_bytes": cm.max_workspace_bytes,
75 "probe_status": probe_status,
76 });
77
78 if let Some(ti) = state.tuning.get() {
80 tuning["memory_source"] = serde_json::Value::String(ti.memory_source.clone());
81 tuning["available_bytes"] =
82 serde_json::Value::Number(serde_json::Number::from(ti.available_bytes));
83 tuning["model_rss_bytes_per_worker"] =
84 serde_json::Value::Number(serde_json::Number::from(ti.model_rss_bytes_per_worker));
85 }
86
87 let body = serde_json::json!({
88 "status": status,
89 "workers": { "live": live, "total": total },
90 "max_seq_length": state.max_seq_length,
91 "tuning": tuning,
92 });
93
94 (StatusCode::OK, Json(body)).into_response()
95}