summaryrefslogblamecommitdiff
path: root/makima/src/server/messages.rs
blob: 401afb03f66b51abfa7fa922b76036c0230602ef (plain) (tree)


























                                                                  





                                                                        
































































                                                                             
//! WebSocket and API message types for the makima server.

use serde::{Deserialize, Serialize};
use utoipa::ToSchema;

/// Audio encoding format for WebSocket streaming.
#[derive(Debug, Clone, Copy, Deserialize, Serialize, ToSchema)]
#[serde(rename_all = "lowercase")]
pub enum AudioEncoding {
    /// 32-bit floating point PCM samples
    Pcm32f,
    /// 16-bit signed integer PCM samples
    Pcm16,
    /// Raw bytes (will be interpreted as PCM16)
    Raw,
}

/// Initial handshake message from client specifying audio format.
#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct StartMessage {
    /// Audio sample rate in Hz (e.g., 16000, 44100, 48000)
    pub sample_rate: u32,
    /// Number of audio channels (1 for mono, 2 for stereo)
    pub channels: u16,
    /// Audio encoding format
    pub encoding: AudioEncoding,
    /// Optional contract ID to save transcript to (requires auth_token)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub contract_id: Option<String>,
    /// Optional auth token (JWT) for authenticated sessions
    #[serde(skip_serializing_if = "Option::is_none")]
    pub auth_token: Option<String>,
}

/// Stop message to terminate the session.
#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct StopMessage {
    /// Optional reason for stopping
    pub reason: Option<String>,
}

/// Wrapper for all WebSocket messages from client to server.
#[derive(Debug, Clone, Deserialize)]
#[serde(tag = "type", rename_all = "camelCase")]
pub enum ClientMessage {
    Start(StartMessage),
    Stop(StopMessage),
}

/// Transcription result message sent from server to client.
#[derive(Debug, Clone, Serialize, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct TranscriptMessage {
    /// Speaker identifier (e.g., "Speaker 0", "Speaker 1")
    pub speaker: String,
    /// Segment start time in seconds
    pub start: f32,
    /// Segment end time in seconds
    pub end: f32,
    /// Transcribed text
    pub text: String,
    /// Whether this is a final or interim result
    pub is_final: bool,
}

/// Wrapper for all WebSocket messages from server to client.
#[derive(Debug, Clone, Serialize)]
#[serde(tag = "type", rename_all = "camelCase")]
pub enum ServerMessage {
    /// Session is ready for audio streaming
    Ready { session_id: String },
    /// Transcription result
    Transcript(TranscriptMessage),
    /// Error occurred during processing
    Error { code: String, message: String },
    /// Session has been stopped
    Stopped { reason: String },
}

/// Error response for HTTP API endpoints.
#[derive(Debug, Clone, Serialize, ToSchema)]
pub struct ApiError {
    /// Error code for programmatic handling
    pub code: String,
    /// Human-readable error message
    pub message: String,
}

impl ApiError {
    pub fn new(code: impl Into<String>, message: impl Into<String>) -> Self {
        Self {
            code: code.into(),
            message: message.into(),
        }
    }
}