summaryrefslogtreecommitdiff
path: root/makima/src/server/messages.rs
diff options
context:
space:
mode:
Diffstat (limited to 'makima/src/server/messages.rs')
-rw-r--r--makima/src/server/messages.rs161
1 files changed, 161 insertions, 0 deletions
diff --git a/makima/src/server/messages.rs b/makima/src/server/messages.rs
index 9c50334..cecb622 100644
--- a/makima/src/server/messages.rs
+++ b/makima/src/server/messages.rs
@@ -103,3 +103,164 @@ impl ApiError {
}
}
}
+
+// =============================================================================
+// TTS (Text-to-Speech) Message Types
+// =============================================================================
+
+/// TTS audio encoding format for WebSocket streaming.
+#[derive(Debug, Clone, Copy, Deserialize, Serialize, ToSchema, PartialEq, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum TtsAudioEncoding {
+ /// 16-bit signed integer PCM samples
+ #[default]
+ Pcm16,
+ /// 32-bit floating point PCM samples
+ Pcm32f,
+}
+
+/// TTS synthesis priority level.
+#[derive(Debug, Clone, Copy, Deserialize, Serialize, ToSchema, PartialEq, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum TtsPriority {
+ /// Low priority - may be queued
+ Low,
+ /// Normal priority (default)
+ #[default]
+ Normal,
+ /// High priority - processed immediately
+ High,
+}
+
+/// TTS session start message from client.
+#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct TtsStartMessage {
+ /// Audio sample rate in Hz (default: 24000)
+ #[serde(default = "default_tts_sample_rate")]
+ pub sample_rate: u32,
+ /// Audio encoding format
+ #[serde(default)]
+ pub encoding: TtsAudioEncoding,
+ /// Voice identifier (default: "makima")
+ #[serde(default = "default_tts_voice")]
+ pub voice: String,
+ /// Language for synthesis (default: "English")
+ #[serde(default = "default_tts_language")]
+ pub language: String,
+}
+
+fn default_tts_sample_rate() -> u32 {
+ 24000
+}
+
+fn default_tts_voice() -> String {
+ "makima".to_string()
+}
+
+fn default_tts_language() -> String {
+ "English".to_string()
+}
+
+/// TTS speak request message from client.
+#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct TtsSpeakMessage {
+ /// Text to synthesize (max 1000 characters)
+ pub text: String,
+ /// Synthesis priority
+ #[serde(default)]
+ pub priority: TtsPriority,
+}
+
+/// TTS stop request message from client.
+#[derive(Debug, Clone, Deserialize, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct TtsStopMessage {
+ /// Optional reason for stopping
+ pub reason: Option<String>,
+}
+
+/// Wrapper for all TTS WebSocket messages from client to server.
+#[derive(Debug, Clone, Deserialize)]
+#[serde(tag = "type", rename_all = "camelCase")]
+pub enum TtsClientMessage {
+ /// Start a new TTS session
+ Start(TtsStartMessage),
+ /// Request speech synthesis
+ Speak(TtsSpeakMessage),
+ /// Stop the current session
+ Stop(TtsStopMessage),
+}
+
+/// TTS session ready message sent from server to client.
+#[derive(Debug, Clone, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct TtsReadyMessage {
+ /// Unique session identifier
+ pub session_id: String,
+ /// Confirmed sample rate
+ pub sample_rate: u32,
+ /// Confirmed encoding format
+ pub encoding: TtsAudioEncoding,
+ /// Confirmed voice
+ pub voice: String,
+}
+
+/// TTS audio chunk message sent from server to client.
+#[derive(Debug, Clone, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct TtsAudioChunkMessage {
+ /// Base64-encoded audio data
+ pub data: String,
+ /// Whether this is the final chunk
+ pub is_final: bool,
+ /// Timestamp in seconds from start of audio
+ pub timestamp: f64,
+}
+
+/// TTS synthesis complete message sent from server to client.
+#[derive(Debug, Clone, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct TtsCompleteMessage {
+ /// Total synthesis duration in milliseconds
+ pub duration_ms: u64,
+ /// Total number of chunks sent
+ pub total_chunks: u32,
+ /// Length of input text
+ pub text_length: u32,
+}
+
+/// TTS error message sent from server to client.
+#[derive(Debug, Clone, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct TtsErrorMessage {
+ /// Error code for programmatic handling
+ pub code: String,
+ /// Human-readable error message
+ pub message: String,
+}
+
+/// TTS session stopped message sent from server to client.
+#[derive(Debug, Clone, Serialize, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct TtsStoppedMessage {
+ /// Reason for stopping
+ pub reason: String,
+}
+
+/// Wrapper for all TTS WebSocket messages from server to client.
+#[derive(Debug, Clone, Serialize)]
+#[serde(tag = "type", rename_all = "camelCase")]
+pub enum TtsServerMessage {
+ /// Session is ready for synthesis requests
+ Ready(TtsReadyMessage),
+ /// Audio chunk (streamed during synthesis)
+ AudioChunk(TtsAudioChunkMessage),
+ /// Synthesis completed
+ Complete(TtsCompleteMessage),
+ /// Error occurred
+ Error(TtsErrorMessage),
+ /// Session has been stopped
+ Stopped(TtsStoppedMessage),
+}