{
  "openapi": "3.1.0",
  "info": {
    "title": "Kurdish TTS & STT API",
    "version": "1.0.0",
    "summary": "Text-to-speech and speech-to-text HTTP API for the Kurdish language (Sorani & Kurmanji).",
    "description": "Public developer API for Kurdish text-to-speech (TTS) and speech-to-text (STT), supporting both Sorani (Central Kurdish, Arabic script) and Kurmanji (Northern Kurdish, Latin script).\n\n**Authentication.** All endpoints except `GET /api/get-speakers` require an API key sent in the `x-api-key` header. TTS and STT use **separate key spaces** — a TTS key will not authenticate against the STT endpoints and vice-versa. Generate keys at https://www.kurdishtts.com/settings/api.\n\n**Gotchas worth knowing before you integrate:**\n- `speed` uses the industry-standard convention (higher = faster); it is clamped to 0.25–4.0.\n- `model_version` is strict: only `v3` or `v4`. Anything else is a 422.\n- For v4 generation controls, *omitting* a field means \"use the model default\". Sending `temperature: 0.0` is a hard 422 — omit it to get the default.\n- A v4 TTS response can be HTTP 200 with `generation.collapsed: true`; treat that as a failed generation (it is not billed).\n- The `websocket_url` returned by `/api/stt-stream-connect` is temporary and does not carry your API key.",
    "contact": {
      "name": "Kurdish TTS",
      "url": "https://www.kurdishtts.com/contact"
    }
  },
  "servers": [
    { "url": "https://www.kurdishtts.com", "description": "Production" }
  ],
  "security": [],
  "tags": [
    { "name": "TTS", "description": "Text-to-speech" },
    { "name": "STT", "description": "Speech-to-text" },
    { "name": "Voices", "description": "Voice catalog" }
  ],
  "paths": {
    "/api/tts-proxy": {
      "post": {
        "tags": ["TTS"],
        "summary": "Synthesize Kurdish speech from text",
        "operationId": "synthesizeSpeech",
        "description": "Convert Kurdish text to speech. Returns `audio/wav` by default, or a JSON payload with base64 audio and word-level timestamps when `include_timestamps` is true. The dialect is derived automatically from the `speaker_id` prefix (`sorani_…` / `kurmanji_…`).",
        "security": [{ "TtsApiKey": [] }],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": { "$ref": "#/components/schemas/TtsRequest" },
              "examples": {
                "basic": {
                  "summary": "Basic Sorani synthesis",
                  "value": { "text": "سڵاو، چۆنیت؟", "speaker_id": "sorani_1" }
                },
                "v4WithTimestamps": {
                  "summary": "v4 with word timestamps",
                  "value": { "text": "Silav, tu çawa yî?", "speaker_id": "kurmanji_236", "model_version": "v4", "include_timestamps": true }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Synthesized audio. `audio/wav` by default; `application/json` (with base64 audio + timestamps) when `include_timestamps` is true.",
            "content": {
              "audio/wav": { "schema": { "type": "string", "format": "binary" } },
              "application/json": { "schema": { "$ref": "#/components/schemas/TtsTimestampsResponse" } }
            }
          },
          "400": { "description": "Bad request (missing speaker_id, text too long, invalid speed).", "content": { "text/plain": { "schema": { "type": "string" } } } },
          "401": { "description": "Missing or invalid API key.", "content": { "text/plain": { "schema": { "type": "string" } } } },
          "403": { "description": "Plan inactive, character quota exceeded, or speaker/model not allowed on your plan.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Error" } } } },
          "422": { "description": "Backend validation error (e.g. `temperature: 0.0`, an unknown model_version, or sending both `stability` and `temperature`). The `detail` array carries the specific messages.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ValidationError" } } } }
        }
      }
    },
    "/api/stt-proxy": {
      "post": {
        "tags": ["STT"],
        "summary": "Transcribe an uploaded Kurdish audio file",
        "operationId": "transcribeAudio",
        "description": "Transcribe a Kurdish audio file (WAV/MP3/FLAC/OGG/M4A). Max file size and transcript length depend on your plan (free: 10 MB / 500 chars; starter: 50 MB / unlimited; pro: 100 MB / unlimited). One credit is debited per successful transcription.",
        "security": [{ "SttApiKey": [] }],
        "requestBody": {
          "required": true,
          "content": {
            "multipart/form-data": {
              "schema": {
                "type": "object",
                "required": ["file", "dialect"],
                "properties": {
                  "file": { "type": "string", "format": "binary", "description": "Audio file (WAV/MP3/FLAC/OGG/M4A)." },
                  "dialect": { "type": "string", "enum": ["sorani", "kurmanji"], "description": "Kurdish dialect of the audio." }
                }
              }
            }
          }
        },
        "responses": {
          "200": { "description": "Transcription result.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SttResponse" } } } },
          "400": { "description": "Missing file, invalid dialect, or file exceeds the plan size limit.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Error" } } } },
          "401": { "description": "Missing or invalid STT API key.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Error" } } } },
          "403": { "description": "No STT plan, plan inactive, or credit limit exceeded.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Error" } } } }
        }
      }
    },
    "/api/stt-stream-connect": {
      "post": {
        "tags": ["STT"],
        "summary": "Open a live streaming transcription session",
        "operationId": "startStreamingTranscription",
        "description": "Returns a temporary WebSocket URL for real-time transcription. Connect to the URL within 5 minutes, then stream raw 16-bit PCM mono audio at 16 kHz as binary frames. Send `{\"type\":\"control\",\"event\":\"finalize\"}` to flush; the server streams `{\"text\":\"…\",\"is_final\":bool}` messages and `{\"type\":\"control\",\"event\":\"done\"}` when complete. One streaming session is debited per call.",
        "security": [{ "SttApiKey": [] }],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "required": ["dialect"],
                "properties": { "dialect": { "type": "string", "enum": ["sorani", "kurmanji"] } }
              }
            }
          }
        },
        "responses": {
          "200": { "description": "Streaming session created.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/StreamConnectResponse" } } } },
          "400": { "description": "Invalid body or dialect.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Error" } } } },
          "401": { "description": "Missing or invalid STT API key.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Error" } } } },
          "403": { "description": "No STT plan, plan inactive, or streaming session limit exceeded.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Error" } } } }
        }
      }
    },
    "/api/get-speakers": {
      "get": {
        "tags": ["Voices"],
        "summary": "List available Kurdish voices",
        "operationId": "listVoices",
        "description": "Public, unauthenticated catalog of available voices. Use the returned `id` as the `speaker_id` for `/api/tts-proxy`. The dialect of each voice is encoded in its id prefix.",
        "parameters": [
          { "name": "model_version", "in": "query", "required": false, "schema": { "type": "string", "enum": ["v3", "v4"] }, "description": "Filter to a model version's voice set (v3 ≈ 198 voices, v4 ≈ 664 voices)." }
        ],
        "responses": {
          "200": { "description": "Voice catalog.", "content": { "application/json": { "schema": { "type": "object", "properties": { "speakers": { "type": "array", "items": { "$ref": "#/components/schemas/Speaker" } } } } } } },
          "500": { "description": "Failed to fetch speakers.", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/Error" } } } }
        }
      }
    }
  },
  "components": {
    "securitySchemes": {
      "TtsApiKey": { "type": "apiKey", "in": "header", "name": "x-api-key", "description": "TTS API key (key space: TTS). Generate at /settings/api." },
      "SttApiKey": { "type": "apiKey", "in": "header", "name": "x-api-key", "description": "STT API key (key space: STT — separate from TTS). Generate at /settings/api." }
    },
    "schemas": {
      "TtsRequest": {
        "type": "object",
        "required": ["text", "speaker_id"],
        "properties": {
          "text": { "type": "string", "description": "Text to synthesize. Max 500 chars on the free plan, 5000 on paid plans.", "maxLength": 5000 },
          "speaker_id": { "type": "string", "description": "Voice id from /api/get-speakers, e.g. `sorani_1` or `kurmanji_236`. The dialect is derived from the prefix.", "examples": ["sorani_1", "kurmanji_236"] },
          "model_version": { "type": "string", "enum": ["v3", "v4"], "default": "v3", "description": "Model version. Strict — any other value is a 422." },
          "include_timestamps": { "type": "boolean", "default": false, "description": "When true, return JSON with base64 audio + word-level timestamps instead of audio/wav." },
          "speed": { "type": "number", "minimum": 0.25, "maximum": 4.0, "description": "Playback speed, industry convention (higher = faster). Clamped to 0.25–4.0." },
          "temperature": { "type": "number", "description": "v4 only. Omit for the model default — sending 0.0 is a hard 422. Mutually exclusive with `stability`." },
          "stability": { "type": "number", "description": "v4 only. Mutually exclusive with `temperature`." },
          "top_p": { "type": "number", "description": "v4 only. Leniently clamped by the backend." },
          "repetition_penalty": { "type": "number", "description": "v4 only. Leniently clamped by the backend." },
          "seed": { "type": "integer", "description": "v4 only. For reproducible generations; the value used is echoed back in `generation.seed_used`." },
          "pitch": { "type": "number", "description": "Post-processing pitch shift." },
          "silence_trim": { "type": "boolean", "description": "Post-processing: trim leading/trailing silence." },
          "chunk_max_seconds": { "type": "number", "description": "v4 only. Max seconds per synthesis chunk." },
          "sentence_silence_ms": { "type": "integer", "description": "v4 only. Silence inserted between sentences, in milliseconds." }
        }
      },
      "TtsTimestampsResponse": {
        "type": "object",
        "description": "Returned when `include_timestamps` is true.",
        "properties": {
          "audio": { "type": "string", "description": "Base64-encoded PCM audio." },
          "timestamps": { "type": "array", "description": "Word-level timestamps.", "items": { "type": "object", "properties": { "word": { "type": "string" }, "start": { "type": "number" }, "end": { "type": "number" } } } },
          "sample_rate": { "type": "integer", "examples": [24000] },
          "audio_duration": { "type": "number" },
          "generation": {
            "type": "object",
            "description": "What the server actually did. Treat `collapsed: true` as a failed generation even on HTTP 200.",
            "properties": {
              "collapsed": { "type": "boolean" },
              "seed_used": { "type": "integer" },
              "temperature_used": { "type": "number" },
              "retries_used": { "type": "integer" },
              "chunk_count": { "type": "integer" }
            }
          }
        }
      },
      "SttResponse": {
        "type": "object",
        "properties": {
          "text": { "type": "string", "description": "Transcribed text." },
          "dialect_hint": { "type": "string" },
          "language": { "type": "string" },
          "detected_dialect": { "type": "string" },
          "detected_script": { "type": "string" },
          "truncated": { "type": "boolean", "description": "True when a free-plan transcript-length cap clipped the text." },
          "truncation_limit": { "type": "integer", "description": "The character cap applied when `truncated` is true." }
        }
      },
      "StreamConnectResponse": {
        "type": "object",
        "properties": {
          "websocket_url": { "type": "string", "format": "uri", "description": "Temporary wss:// URL. Connect within 5 minutes. Does not contain your API key." },
          "dialect": { "type": "string", "enum": ["sorani", "kurmanji"] },
          "plan": { "type": "string" },
          "streaming_sessions_used": { "type": "integer" },
          "streaming_sessions_total": { "type": "integer" },
          "streaming_sessions_purchased": { "type": "integer" },
          "streaming_sessions_remaining": { "type": "integer" },
          "max_session_duration_seconds": { "type": "integer" },
          "message": { "type": "string" }
        }
      },
      "Speaker": {
        "type": "object",
        "properties": {
          "id": { "type": "string", "description": "Voice id; use as `speaker_id` in /api/tts-proxy.", "examples": ["sorani_1"] },
          "name": { "type": "string", "examples": ["Female 1"] },
          "dialect": { "type": "string", "enum": ["sorani", "kurmanji"] },
          "gender": { "type": "string", "enum": ["male", "female"] },
          "speaker_id": { "type": "string" }
        }
      },
      "Error": {
        "type": "object",
        "properties": {
          "error": { "type": "string" },
          "detail": { "type": "string" },
          "upgrade_url": { "type": "string" }
        }
      },
      "ValidationError": {
        "type": "object",
        "description": "FastAPI-style validation error forwarded from the model backend.",
        "properties": {
          "detail": {
            "type": "array",
            "items": {
              "type": "object",
              "properties": {
                "loc": { "type": "array", "items": { "type": "string" } },
                "msg": { "type": "string" },
                "type": { "type": "string" }
              }
            }
          }
        }
      }
    }
  }
}
