Struct CostModel

Source

pub struct CostModel {
    pub a: f64,
    pub b: f64,
    pub max_workspace_bytes: usize,
}

Expand description

Quadratic-aware workspace cost model for ONNX attention inference.

BGE-M3 uses multi-head attention whose intermediate tensor footprint scales as O(batch * seq^2) (attention score matrix) plus O(batch * seq) (FFN intermediates, projection matrices). The total peak workspace is approximately:

peak ≈ a * (batch * seq) + b * (batch * seq^2)

where a (bytes/token-position) captures the FFN / projection contribution and b (bytes/token-position^2) captures the attention contribution.

At sequence length 512 attention is small relative to FFN, so a linear approximation works. At 8192, b * N^2 dominates by ~16×, so using only a would under-budget by that same factor.

Coefficients are derived at startup by crate::probe or set conservatively from compile-time defaults when measurement is unavailable.

Fields§

§a: f64

Bytes per token-position (linear term: FFN intermediates, projections).

§b: f64

Bytes per token-position-squared (quadratic term: attention scores).

§max_workspace_bytes: usize

Maximum workspace bytes available per worker for a single session.run() call.

Struct CostModel Copy item path

Fields§

Implementations§

impl CostModel

pub const CONSERVATIVE_A: f64 = 16_384.0

pub const CONSERVATIVE_B: f64 = 8.0

pub const DEFAULT_MAX_WORKSPACE: usize

pub fn conservative(max_workspace_bytes: usize) -> Self

pub fn chunk_cost(&self, count: usize, max_seq: usize) -> u128

pub fn fits(&self, count: usize, max_seq: usize) -> bool

Trait Implementations§

impl Clone for CostModel

fn clone(&self) -> CostModel

fn clone_from(&mut self, source: &Self)

impl Debug for CostModel

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Copy for CostModel

Auto Trait Implementations§

impl Freeze for CostModel

impl RefUnwindSafe for CostModel

impl Send for CostModel

impl Sync for CostModel

impl Unpin for CostModel

impl UnsafeUnpin for CostModel

impl UnwindSafe for CostModel

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> FromRef<T> for Twhere T: Clone,

fn from_ref(input: &T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct CostModel

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T> FromRef<T> for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,