From 01088f4f1915e36a7d0d8d8756f62f8207a48911 Mon Sep 17 00:00:00 2001 From: soryu Date: Sat, 20 Dec 2025 15:36:04 +0000 Subject: Implement makima listen websockets server --- makima/src/server/messages.rs | 92 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 makima/src/server/messages.rs (limited to 'makima/src/server/messages.rs') diff --git a/makima/src/server/messages.rs b/makima/src/server/messages.rs new file mode 100644 index 0000000..0c92447 --- /dev/null +++ b/makima/src/server/messages.rs @@ -0,0 +1,92 @@ +//! WebSocket and API message types for the makima server. + +use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; + +/// Audio encoding format for WebSocket streaming. +#[derive(Debug, Clone, Copy, Deserialize, Serialize, ToSchema)] +#[serde(rename_all = "lowercase")] +pub enum AudioEncoding { + /// 32-bit floating point PCM samples + Pcm32f, + /// 16-bit signed integer PCM samples + Pcm16, + /// Raw bytes (will be interpreted as PCM16) + Raw, +} + +/// Initial handshake message from client specifying audio format. +#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct StartMessage { + /// Audio sample rate in Hz (e.g., 16000, 44100, 48000) + pub sample_rate: u32, + /// Number of audio channels (1 for mono, 2 for stereo) + pub channels: u16, + /// Audio encoding format + pub encoding: AudioEncoding, +} + +/// Stop message to terminate the session. +#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct StopMessage { + /// Optional reason for stopping + pub reason: Option, +} + +/// Wrapper for all WebSocket messages from client to server. +#[derive(Debug, Clone, Deserialize)] +#[serde(tag = "type", rename_all = "camelCase")] +pub enum ClientMessage { + Start(StartMessage), + Stop(StopMessage), +} + +/// Transcription result message sent from server to client. +#[derive(Debug, Clone, Serialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct TranscriptMessage { + /// Speaker identifier (e.g., "Speaker 0", "Speaker 1") + pub speaker: String, + /// Segment start time in seconds + pub start: f32, + /// Segment end time in seconds + pub end: f32, + /// Transcribed text + pub text: String, + /// Whether this is a final or interim result + pub is_final: bool, +} + +/// Wrapper for all WebSocket messages from server to client. +#[derive(Debug, Clone, Serialize)] +#[serde(tag = "type", rename_all = "camelCase")] +pub enum ServerMessage { + /// Session is ready for audio streaming + Ready { session_id: String }, + /// Transcription result + Transcript(TranscriptMessage), + /// Error occurred during processing + Error { code: String, message: String }, + /// Session has been stopped + Stopped { reason: String }, +} + +/// Error response for HTTP API endpoints. +#[derive(Debug, Clone, Serialize, ToSchema)] +pub struct ApiError { + /// Error code for programmatic handling + pub code: String, + /// Human-readable error message + pub message: String, +} + +impl ApiError { + pub fn new(code: impl Into, message: impl Into) -> Self { + Self { + code: code.into(), + message: message.into(), + } + } +} -- cgit v1.2.3