summaryrefslogtreecommitdiff
path: root/makima/src/server/messages.rs
diff options
context:
space:
mode:
authorsoryu <soryu@soryu.co>2025-12-20 15:36:04 +0000
committersoryu <soryu@soryu.co>2025-12-23 14:47:18 +0000
commit01088f4f1915e36a7d0d8d8756f62f8207a48911 (patch)
tree8fdbba900f3f4bba32bae76e2e0378848a90cf93 /makima/src/server/messages.rs
parentab9166170043ba5e0ce974e5b7accf0939d686e3 (diff)
downloadsoryu-01088f4f1915e36a7d0d8d8756f62f8207a48911.tar.gz
soryu-01088f4f1915e36a7d0d8d8756f62f8207a48911.zip
Implement makima listen websockets server
Diffstat (limited to 'makima/src/server/messages.rs')
-rw-r--r--makima/src/server/messages.rs92
1 files changed, 92 insertions, 0 deletions
diff --git a/makima/src/server/messages.rs b/makima/src/server/messages.rs
new file mode 100644
index 0000000..0c92447
--- /dev/null
+++ b/makima/src/server/messages.rs
@@ -0,0 +1,92 @@
+//! WebSocket and API message types for the makima server.
+
+use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
+
+/// Audio encoding format for WebSocket streaming.
+#[derive(Debug, Clone, Copy, Deserialize, Serialize, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum AudioEncoding {
+ /// 32-bit floating point PCM samples
+ Pcm32f,
+ /// 16-bit signed integer PCM samples
+ Pcm16,
+ /// Raw bytes (will be interpreted as PCM16)
+ Raw,
+}
+
+/// Initial handshake message from client specifying audio format.
+#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct StartMessage {
+ /// Audio sample rate in Hz (e.g., 16000, 44100, 48000)
+ pub sample_rate: u32,
+ /// Number of audio channels (1 for mono, 2 for stereo)
+ pub channels: u16,
+ /// Audio encoding format
+ pub encoding: AudioEncoding,
+}
+
+/// Stop message to terminate the session.
+#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct StopMessage {
+ /// Optional reason for stopping
+ pub reason: Option<String>,
+}
+
+/// Wrapper for all WebSocket messages from client to server.
+#[derive(Debug, Clone, Deserialize)]
+#[serde(tag = "type", rename_all = "camelCase")]
+pub enum ClientMessage {
+ Start(StartMessage),
+ Stop(StopMessage),
+}
+
+/// Transcription result message sent from server to client.
+#[derive(Debug, Clone, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct TranscriptMessage {
+ /// Speaker identifier (e.g., "Speaker 0", "Speaker 1")
+ pub speaker: String,
+ /// Segment start time in seconds
+ pub start: f32,
+ /// Segment end time in seconds
+ pub end: f32,
+ /// Transcribed text
+ pub text: String,
+ /// Whether this is a final or interim result
+ pub is_final: bool,
+}
+
+/// Wrapper for all WebSocket messages from server to client.
+#[derive(Debug, Clone, Serialize)]
+#[serde(tag = "type", rename_all = "camelCase")]
+pub enum ServerMessage {
+ /// Session is ready for audio streaming
+ Ready { session_id: String },
+ /// Transcription result
+ Transcript(TranscriptMessage),
+ /// Error occurred during processing
+ Error { code: String, message: String },
+ /// Session has been stopped
+ Stopped { reason: String },
+}
+
+/// Error response for HTTP API endpoints.
+#[derive(Debug, Clone, Serialize, ToSchema)]
+pub struct ApiError {
+ /// Error code for programmatic handling
+ pub code: String,
+ /// Human-readable error message
+ pub message: String,
+}
+
+impl ApiError {
+ pub fn new(code: impl Into<String>, message: impl Into<String>) -> Self {
+ Self {
+ code: code.into(),
+ message: message.into(),
+ }
+ }
+}