diff --git a/.gitignore b/.gitignore index f3074d56..f9303614 100644 --- a/.gitignore +++ b/.gitignore @@ -171,3 +171,6 @@ cython_debug/ # PyPI configuration file .pypirc + +# Examples +**/output.wav \ No newline at end of file diff --git a/Makefile b/Makefile index 575f3514..7f69ce26 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,12 @@ # Makefile for Speechmatics Python SDKs .PHONY: help -.PHONY: test-all test-rt test-batch test-flow -.PHONY: format-all format-rt format-batch format-flow -.PHONY: lint-all lint-rt lint-batch lint-flow -.PHONY: type-check-all type-check-rt type-check-batch type-check-flow -.PHONY: build-all build-rt build-batch build-flow -.PHONY: clean-all clean-rt clean-batch clean-flow clean-flow +.PHONY: test-all test-rt test-batch test-flow test-tts +.PHONY: format-all format-rt format-batch format-flow format-tts +.PHONY: lint-all lint-rt lint-batch lint-flow lint-tts +.PHONY: type-check-all type-check-rt type-check-batch type-check-flow type-check-tts +.PHONY: build-all build-rt build-batch build-flow build-tts +.PHONY: clean-all clean-rt clean-batch clean-flow clean-tts help: @echo "Available commands:" @@ -40,16 +40,18 @@ help: @echo " build-rt Build RT SDK" @echo " build-batch Build Batch SDK" @echo " build-flow Build Flow SDK" + @echo " build-tts Build TTS SDK" @echo "" @echo "Cleaning:" @echo " clean-all Clean all SDKs" @echo " clean-rt Clean RT SDK build artifacts" @echo " clean-batch Clean Batch SDK build artifacts" @echo " clean-flow Clean Flow SDK build artifacts" + @echo " clean-tts Clean TTS SDK build artifacts" @echo "" # Testing targets -test-all: test-rt test-batch test-flow +test-all: test-rt test-batch test-flow test-tts test-rt: pytest tests/rt/ -v @@ -61,7 +63,7 @@ test-flow: pytest tests/flow/ -v # Formatting targets -format-all: format-rt format-batch format-flow +format-all: format-rt format-batch format-flow format-tts format-rt: cd sdk/rt/speechmatics && black . @@ -75,8 +77,12 @@ format-flow: cd sdk/flow/speechmatics && black . cd sdk/flow/speechmatics && ruff check --fix . +format-tts: + cd sdk/tts/speechmatics && black . + cd sdk/tts/speechmatics && ruff check --fix . + # Linting targets -lint-all: lint-rt lint-batch lint-flow +lint-all: lint-rt lint-batch lint-flow lint-tts lint-rt: cd sdk/rt/speechmatics && ruff check . @@ -87,8 +93,11 @@ lint-batch: lint-flow: cd sdk/flow/speechmatics && ruff check . +lint-tts: + cd sdk/tts/speechmatics && ruff check . + # Type checking targets -type-check-all: type-check-rt type-check-batch type-check-flow +type-check-all: type-check-rt type-check-batch type-check-flow type-check-tts type-check-rt: cd sdk/rt/speechmatics && mypy . @@ -99,18 +108,22 @@ type-check-batch: type-check-flow: cd sdk/flow/speechmatics && mypy . +type-check-tts: + cd sdk/tts/speechmatics && mypy . + # Installation targets install-dev: python -m pip install --upgrade pip python -m pip install -e sdk/rt[dev] python -m pip install -e sdk/batch[dev] python -m pip install -e sdk/flow[dev] + python -m pip install -e sdk/tts[dev] install-build: python -m pip install --upgrade build # Building targets -build-all: build-rt build-batch build-flow +build-all: build-rt build-batch build-flow build-tts build-rt: install-build cd sdk/rt && python -m build @@ -121,8 +134,11 @@ build-batch: install-build build-flow: install-build cd sdk/flow && python -m build +build-tts: install-build + cd sdk/tts && python -m build + # Cleaning targets -clean-all: clean-rt clean-batch clean-flow +clean-all: clean-rt clean-batch clean-flow clean-tts clean-rt: rm -rf sdk/rt/dist sdk/rt/build sdk/rt/*.egg-info @@ -135,3 +151,7 @@ clean-batch: clean-flow: rm -rf sdk/flow/dist sdk/flow/build sdk/flow/*.egg-info find sdk/flow -name __pycache__ -exec rm -rf {} + 2>/dev/null || true + +clean-tts: + rm -rf sdk/tts/dist sdk/tts/build sdk/tts/*.egg-info + find sdk/tts -name __pycache__ -exec rm -rf {} + 2>/dev/null || true \ No newline at end of file diff --git a/examples/tts/tts_async_example.py b/examples/tts/tts_async_example.py new file mode 100644 index 00000000..efa4bec3 --- /dev/null +++ b/examples/tts/tts_async_example.py @@ -0,0 +1,55 @@ +import os +import asyncio + +import wave +from pathlib import Path + +from speechmatics.tts import AsyncClient, Voice, OutputFormat + + +# Set configuration +TEXT = "Welcome to the future of audio generation from text!" +VOICE = Voice.SARAH +OUTPUT_FORMAT = OutputFormat.RAW_PCM_16000 +OUTPUT_FILE = "output.wav" + +# Set Format Parameters for WAV output file +SAMPLE_RATE = 16000 #Hz +SAMPLE_WIDTH = 2 # 16-bit audio +CHANNELS = 1 # Mono audio + +# Save audio to WAV file +async def save_audio_to_wav(audio_data: bytes, + output_file_name: str) -> None: + with wave.open(output_file_name, "wb") as wav_file: + wav_file.setnchannels(CHANNELS) + wav_file.setsampwidth(SAMPLE_WIDTH) + wav_file.setframerate(SAMPLE_RATE) + wav_file.writeframes(audio_data) + +# Generate speech from text and save to WAV file +async def main(): + print(f"Generating speech from text: {TEXT}") + + try: + async with AsyncClient() as client: + async with await client.generate( + text=TEXT, + voice=VOICE, + output_format=OUTPUT_FORMAT + ) as response: + # Process the response in chunks and save to WAV + audio_chunks = [] + async for chunk in response.content.iter_chunked(1024): + audio_chunks.append(chunk) + + # Combine chunks and save to WAV + audio_data = b''.join(audio_chunks) + await save_audio_to_wav(audio_data, OUTPUT_FILE) + print(f"Speech saved to {Path(OUTPUT_FILE).resolve()}") + except Exception as e: + print(f"An error occurred: {e}") + +# Run the async main function +if __name__ == "__main__": + asyncio.run(main()) diff --git a/sdk/tts/README.md b/sdk/tts/README.md new file mode 100644 index 00000000..07514a6b --- /dev/null +++ b/sdk/tts/README.md @@ -0,0 +1,128 @@ +# Speechmatics TTS API Client + +[![PyPI](https://img.shields.io/pypi/v/speechmatics-tts)](https://pypi.org/project/speechmatics-tts/) +![PythonSupport](https://img.shields.io/badge/Python-3.9%2B-green) + +Async Python client for Speechmatics TTS API. + +## Features + +- Async API client with comprehensive error handling +- Type hints throughout for better IDE support +- Environment variable support for credentials + +## Installation + +```bash +pip install speechmatics-tts +``` + +## Usage + +### Quick Start + +```python +import asyncio + +import wave +from pathlib import Path + +from speechmatics.tts import AsyncClient, Voice, OutputFormat + +async def save_audio(audio_data: bytes, filename: str) -> None: + with wave.open(filename, "wb") as wav: + wav.setnchannels(1) # Mono + wav.setsampwidth(2) # 16-bit + wav.setframerate(16000) # 16kHz + wav.writeframes(audio_data) + +# Generate speech data from text and save to WAV file +async def main(): + async with AsyncClient() as client: + async with await client.generate( + text="Welcome to the future of audio generation from text!", + voice=Voice.SARAH, + output_format=OutputFormat.RAW_PCM_16000 + ) as response: + audio = b''.join([chunk async for chunk in response.content.iter_chunked(1024)]) + await save_audio(audio, "output.wav") + + +# Run the async main function +if __name__ == "__main__": + asyncio.run(main()) + +``` + +### Error Handling + +```python +import asyncio +from speechmatics.tts import ( + AsyncClient, + AuthenticationError, + TimeoutError +) + +async def main(): + try: + async with AsyncClient() as client: + response = await client.generate(text="Hello, this is the Speechmatics TTS API. We are excited to have you here!") + + except AuthenticationError: + print("Invalid API key") + except JobError as e: + print(f"Job processing failed: {e}") + except TimeoutError as e: + print(f"Job timed out: {e}") + except FileNotFoundError: + print("Audio file not found") + +asyncio.run(main()) +``` + +### Connection Configuration + +```python +import asyncio +from speechmatics.tts import AsyncClient, ConnectionConfig + +async def main(): + # Custom connection settings + config = ConnectionConfig( + url="https://preview.tts.speechmatics.com", + api_key="your-api-key", + connect_timeout=30.0, + operation_timeout=600.0 + ) + + async with AsyncClient(conn_config=config) as client: + response = await client.generate(text="Hello World") + + +asyncio.run(main()) +``` + +## Logging + +The client supports logging with job id tracing for debugging. To increase logging verbosity, set `DEBUG` level in your example code: + +```python +import logging +import sys + +logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.StreamHandler(sys.stdout) + ] +) +``` + +## Environment Variables + +The client supports the following environment variables: + +- `SPEECHMATICS_API_KEY`: Your Speechmatics API key +- `SPEECHMATICS_TTS_URL`: Custom API endpoint URL (optional) diff --git a/sdk/tts/pyproject.toml b/sdk/tts/pyproject.toml new file mode 100644 index 00000000..065ccecf --- /dev/null +++ b/sdk/tts/pyproject.toml @@ -0,0 +1,52 @@ +[build-system] +requires = ["setuptools>=61.0.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "speechmatics-tts" +dynamic = ["version"] +description = "Speechmatics TTS API Client" +readme = "README.md" +authors = [{ name = "Speechmatics", email = "support@speechmatics.com" }] +license = "MIT" +requires-python = ">=3.9" +dependencies = ["aiohttp", "aiofiles"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Operating System :: OS Independent", + "Topic :: Multimedia :: Sound/Audio :: Speech", + "Topic :: Software Development :: Libraries :: Python Modules", +] +keywords = ["speechmatics", "speech-to-text", "tts", "transcription", "api"] + +[project.optional-dependencies] +dev = [ + "black", + "ruff", + "mypy", + "types-aiofiles", + "pre-commit", + "pytest", + "pytest-asyncio", + "pytest-cov", + "pytest-mock", + "build", +] + +[project.urls] +homepage = "https://github.com/speechmatics/speechmatics-python-sdk" +documentation = "https://docs.speechmatics.com/" +repository = "https://github.com/speechmatics/speechmatics-python-sdk" +issues = "https://github.com/speechmatics/speechmatics-python-sdk/issues" + +[tool.setuptools.dynamic] +version = { attr = "speechmatics.tts.__version__" } + +[tool.setuptools.packages.find] +where = ["."] diff --git a/sdk/tts/speechmatics/tts/__init__.py b/sdk/tts/speechmatics/tts/__init__.py new file mode 100644 index 00000000..68e44e5f --- /dev/null +++ b/sdk/tts/speechmatics/tts/__init__.py @@ -0,0 +1,29 @@ +__version__ = "0.0.0" + +from ._async_client import AsyncClient +from ._auth import AuthBase +from ._auth import JWTAuth +from ._auth import StaticKeyAuth +from ._exceptions import AuthenticationError +from ._exceptions import ConfigurationError +from ._exceptions import ConnectionError +from ._exceptions import TimeoutError +from ._exceptions import TransportError +from ._models import ConnectionConfig +from ._models import OutputFormat +from ._models import Voice + +__all__ = [ + "AsyncClient", + "AuthBase", + "JWTAuth", + "StaticKeyAuth", + "ConfigurationError", + "AuthenticationError", + "ConnectionError", + "TransportError", + "TimeoutError", + "ConnectionConfig", + "Voice", + "OutputFormat", +] \ No newline at end of file diff --git a/sdk/tts/speechmatics/tts/_async_client.py b/sdk/tts/speechmatics/tts/_async_client.py new file mode 100644 index 00000000..ab200b57 --- /dev/null +++ b/sdk/tts/speechmatics/tts/_async_client.py @@ -0,0 +1,183 @@ +""" +Asynchronous client for Speechmatics TTS transcription. + +This module provides the main AsyncClient class that handles text-to-speech +using the Speechmatics TTS API. +""" + +from __future__ import annotations + +import os +import uuid +from typing import Any +from typing import Optional + +import aiohttp + +from ._auth import AuthBase +from ._auth import StaticKeyAuth +from ._logging import get_logger +from ._models import ConnectionConfig +from ._models import OutputFormat +from ._models import Voice +from ._transport import Transport + + +class AsyncClient: + """ + Asynchronous client for Speechmatics TTS transcription. + + This client provides a full-featured async interface to the Speechmatics TTS API, + supporting job submission, monitoring, and result retrieval with comprehensive + error management. It properly implements the Speechmatics REST API. + + The client handles the complete batch transcription workflow: + 1. Job submission with audio file and configuration + 2. Job status monitoring (with polling helpers) + 3. Result retrieval when transcription is complete + 4. Proper cleanup and error handling + + Args: + auth: Authentication instance. If not provided, uses StaticKeyAuth + with api_key parameter or SPEECHMATICS_API_KEY environment variable. + api_key: Speechmatics API key (used only if auth not provided). + url: REST API endpoint URL. If not provided, uses SPEECHMATICS_TTS_URL + environment variable or defaults to production endpoint. + conn_config: Complete connection configuration object. If provided, overrides + other parameters. + + Raises: + ConfigurationError: If required configuration is missing or invalid. + + Examples: + Basic usage: + >>> async with AsyncClient(api_key="your-key") as client: + ... response = await client.generate(text="Hello world") + ... print(response) + + With JWT authentication: + >>> from speechmatics.batch import JWTAuth + >>> auth = JWTAuth("your-api-key", ttl=3600) + >>> async with AsyncClient(auth=auth) as client: + ... # Use client with JWT auth + ... pass + """ + + def __init__( + self, + auth: Optional[AuthBase] = None, + *, + api_key: Optional[str] = None, + url: Optional[str] = None, + conn_config: Optional[ConnectionConfig] = None, + ) -> None: + """ + Initialize the AsyncClient. + + Args: + auth: Authentication method, it can be StaticKeyAuth or JWTAuth. + If None, creates StaticKeyAuth with the api_key. + api_key: Speechmatics API key. If None, uses SPEECHMATICS_API_KEY env var. + url: REST API endpoint URL. If None, uses SPEECHMATICS_TTS_URL env var + or defaults to production endpoint. + conn_config: Complete connection configuration. + + Raises: + ConfigurationError: If auth is None and API key is not provided/found. + """ + self._auth = auth or StaticKeyAuth(api_key) + self._url = url or os.environ.get("SPEECHMATICS_TTS_URL") or "https://preview.tts.speechmatics.com" + self._conn_config = conn_config or ConnectionConfig() + self._request_id = str(uuid.uuid4()) + self._transport = Transport(self._url, self._conn_config, self._auth, self._request_id) + + self._logger = get_logger(__name__) + self._logger.debug("AsyncClient initialized (request_id=%s, url=%s)", self._request_id, self._url) + + async def __aenter__(self) -> AsyncClient: + """ + Async context manager entry. + + Returns: + Self for use in async with statements. + + Examples: + >>> async with AsyncClient(api_key="key") as client: + ... response = await client.generate(text="Hello world") + ... print(response) + """ + return self + + async def generate( + self, + *, + text: str = "", + voice: Voice = Voice.SARAH, + output_format: OutputFormat = OutputFormat.RAW_PCM_16000, + ) -> aiohttp.ClientResponse: + """ + Convert text to speech audio. + + Args: + text: Text to convert to speech. + voice: Voice ID to use for synthesis (e.g., "en-US-neural-1"). + output_format: Audio format ("wav", "mp3", "ogg"). + + Returns: + Audio data as bytes. + + Raises: + AuthenticationError: If API key is invalid. + TransportError: If synthesis fails. + + Examples: + >>> response = await client.generate(text="Hello world") + >>> audio_data = await response.read() + >>> with open("output.wav", "wb") as f: + ... f.write(audio_data) + """ + # Prepare synthesis request + request_data = { + "text": text, + } + + response = await self._transport.post( + f"/generate/{voice.value}?output_format={output_format.value}", json_data=request_data + ) + return response + + async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + """ + Async context manager exit with automatic cleanup. + + Ensures all resources are properly cleaned up when exiting the + async context manager, including closing HTTP connections. + + Args: + exc_type: Exception type if an exception occurred. + exc_val: Exception value if an exception occurred. + exc_tb: Exception traceback if an exception occurred. + """ + await self.close() + + async def close(self) -> None: + """ + Close the client and cleanup all resources. + + This method ensures proper cleanup of all client resources including + closing HTTP connections and sessions. + + This method is safe to call multiple times and will handle cleanup + gracefully even if errors occur during the process. + + Examples: + >>> client = AsyncClient(api_key="key") + >>> try: + ... result = await client.generate(text="Hello world") + >>> finally: + ... await client.close() + """ + try: + await self._transport.close() + except Exception: + pass # Best effort cleanup diff --git a/sdk/tts/speechmatics/tts/_auth.py b/sdk/tts/speechmatics/tts/_auth.py new file mode 100644 index 00000000..5e704044 --- /dev/null +++ b/sdk/tts/speechmatics/tts/_auth.py @@ -0,0 +1,162 @@ +import abc +import asyncio +import os +import time +from typing import Literal +from typing import Optional + +from ._exceptions import AuthenticationError + + +class AuthBase(abc.ABC): + """ + Abstract base class for authentication methods. + """ + + BASE_URL = "https://mp.speechmatics.com" + + @abc.abstractmethod + async def get_auth_headers(self) -> dict[str, str]: + """ + Get authentication headers asynchronously. + + Returns: + A dictionary of authentication headers. + """ + raise NotImplementedError + + +class StaticKeyAuth(AuthBase): + """ + Authentication using a static API key. + + This is the traditional authentication method where the same + API key is used for all requests. + + Args: + api_key: The Speechmatics API key. + + Examples: + >>> auth = StaticKeyAuth("your-api-key") + >>> headers = await auth.get_auth_headers() + >>> print(headers) + {'Authorization': 'Bearer your-api-key'} + """ + + def __init__(self, api_key: Optional[str] = None): + self._api_key = api_key or os.environ.get("SPEECHMATICS_API_KEY") + + if not self._api_key: + raise ValueError("API key required: provide api_key or set SPEECHMATICS_API_KEY") + + async def get_auth_headers(self) -> dict[str, str]: + return {"Authorization": f"Bearer {self._api_key}"} + + +class JWTAuth(AuthBase): + """ + Authentication using temporary JWT tokens. + + Generates short-lived JWTs for enhanced security. + + Args: + api_key: The main Speechmatics API key used to generate JWTs. + ttl: Time-to-live for tokens between 60 and 86400 seconds. + For security reasons, we suggest using the shortest TTL possible. + region: Self-Service customers are restricted to "eu". + Enterprise customers can use this to specify which region the temporary key should be enabled in. + client_ref: Optional client reference for JWT token. + This parameter must be used if the temporary keys are exposed to the end-user's client + to prevent a user from accessing the data of a different user. + mp_url: Optional management platform URL override. + request_id: Optional request ID for debugging purposes. + + Examples: + >>> auth = JWTAuth("your-api-key") + >>> headers = await auth.get_auth_headers() + >>> print(headers) + {'Authorization': 'Bearer eyJhbGciOiJSUzI1NiIs...'} + """ + + def __init__( + self, + api_key: Optional[str] = None, + *, + ttl: int = 60, + region: Literal["eu", "usa", "au"] = "eu", + client_ref: Optional[str] = None, + mp_url: Optional[str] = None, + request_id: Optional[str] = None, + ): + self._api_key = api_key or os.environ.get("SPEECHMATICS_API_KEY") + self._ttl = ttl + self._region = region + self._client_ref = client_ref + self._request_id = request_id + self._mp_url = mp_url or os.getenv("SM_MANAGEMENT_PLATFORM_URL", self.BASE_URL) + + if not self._api_key: + raise ValueError( + "API key required: please provide api_key or set SPEECHMATICS_API_KEY environment variable" + ) + + if not 60 <= self._ttl <= 86_400: + raise ValueError("ttl must be between 60 and 86400 seconds") + + self._cached_token: Optional[str] = None + self._token_expires_at: float = 0 + self._token_lock = asyncio.Lock() + + async def get_auth_headers(self) -> dict[str, str]: + """Get JWT auth headers with caching.""" + async with self._token_lock: + current_time = time.time() + if current_time >= self._token_expires_at - 10: + self._cached_token = await self._generate_token() + self._token_expires_at = current_time + self._ttl + + return {"Authorization": f"Bearer {self._cached_token}"} + + async def _generate_token(self) -> str: + try: + import aiohttp + except ImportError: + raise ImportError( + "aiohttp is required for JWT authentication. Please install it with `pip install 'speechmatics-tts[jwt]'`" + ) + + endpoint = f"{self._mp_url}/v1/api_keys" + params = {"type": "tts"} + payload = {"ttl": self._ttl, "region": str(self._region)} + + if self._client_ref: + payload["client_ref"] = self._client_ref + + headers = { + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + } + + if self._request_id: + headers["X-Request-Id"] = self._request_id + + try: + async with aiohttp.ClientSession() as session: + async with session.post( + endpoint, + params=params, + json=payload, + headers=headers, + timeout=aiohttp.ClientTimeout(total=10), + ) as response: + if response.status != 201: + text = await response.text() + raise AuthenticationError(f"Failed to generate JWT: HTTP {response.status}: {text}") + + data = await response.json() + return str(data["key_value"]) + + except aiohttp.ClientError as e: + raise AuthenticationError(f"Network error generating JWT: {e}") + except Exception as e: + raise AuthenticationError(f"Unexpected error generating JWT: {e}") diff --git a/sdk/tts/speechmatics/tts/_exceptions.py b/sdk/tts/speechmatics/tts/_exceptions.py new file mode 100644 index 00000000..173c4002 --- /dev/null +++ b/sdk/tts/speechmatics/tts/_exceptions.py @@ -0,0 +1,40 @@ +class ConfigurationError(Exception): + """Raised when there's an error in configuration.""" + + pass + + +class AuthenticationError(Exception): + """Raised when authentication fails.""" + + pass + + +class ConnectionError(Exception): + """Raised when connection to the service fails.""" + + pass + + +class TransportError(Exception): + """Raised when there's an error in the transport layer.""" + + pass + + +class BatchError(Exception): + """Raised when batch processing fails.""" + + pass + + +class JobError(Exception): + """Raised when there's an error with a job.""" + + pass + + +class TimeoutError(Exception): + """Raised when an operation times out.""" + + pass diff --git a/sdk/tts/speechmatics/tts/_helpers.py b/sdk/tts/speechmatics/tts/_helpers.py new file mode 100644 index 00000000..443e49a4 --- /dev/null +++ b/sdk/tts/speechmatics/tts/_helpers.py @@ -0,0 +1,57 @@ +""" +Utility functions for the Speechmatics Batch SDK. +""" + +from __future__ import annotations + +import importlib.metadata +import os +from collections.abc import AsyncGenerator +from contextlib import asynccontextmanager +from typing import BinaryIO +from typing import Union + +import aiofiles + + +@asynccontextmanager +async def prepare_audio_file( + audio_file: Union[str, BinaryIO], +) -> AsyncGenerator[tuple[str, Union[BinaryIO, bytes]], None]: + """ + Async context manager for file handling with proper resource management. + + Args: + audio_file: Path to audio file or file-like object containing audio data. + + Yields: + Tuple of (filename, file_data) + + Examples: + >>> async with prepare_audio_file("audio.wav") as (filename, file_data): + ... # Use file_data for upload + ... pass + """ + if isinstance(audio_file, str): + async with aiofiles.open(audio_file, "rb") as f: + content = await f.read() + filename = os.path.basename(audio_file) + yield filename, content + else: + # It's already a file-like object + filename = getattr(audio_file, "name", "audio.wav") + if hasattr(filename, "split"): + filename = os.path.basename(filename) + yield filename, audio_file + + +def get_version() -> str: + try: + return importlib.metadata.version("speechmatics-batch") + except importlib.metadata.PackageNotFoundError: + try: + from . import __version__ + + return __version__ + except ImportError: + return "0.0.0" diff --git a/sdk/tts/speechmatics/tts/_logging.py b/sdk/tts/speechmatics/tts/_logging.py new file mode 100644 index 00000000..e63bda6d --- /dev/null +++ b/sdk/tts/speechmatics/tts/_logging.py @@ -0,0 +1,49 @@ +import logging + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +def get_logger(name: str) -> logging.Logger: + """ + Get a logger that stays silent by default. + + The logger uses Python's standard logging module and includes NullHandler + by default to avoid unwanted output. Users can configure logging levels + and handlers as needed. + + Args: + name: Logger name, typically __name__ from the calling module. + + Returns: + Configured logger instance. + + Examples: + Basic usage in SDK modules: + logger = get_logger(__name__) + logger.debug("HTTP request sent %s %s", method, url) + logger.info("Job submitted (job_id=%s)", job_id) + logger.warning("Job failed (job_id=%s): %s", job_id, error) + logger.error("Connection failed: %s", e) + + Enable debug logging in user code: + import logging + logging.basicConfig(level=logging.DEBUG) + # Now all SDK debug messages will be visible + + Custom logging configuration: + import logging + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + + # Or for specific components: + logging.getLogger('speechmatics.batch').setLevel(logging.DEBUG) + """ + module_logger = logging.getLogger(name) + module_logger.addHandler(logging.NullHandler()) + return module_logger + + +__all__ = ["get_logger"] diff --git a/sdk/tts/speechmatics/tts/_models.py b/sdk/tts/speechmatics/tts/_models.py new file mode 100644 index 00000000..fdbca0e6 --- /dev/null +++ b/sdk/tts/speechmatics/tts/_models.py @@ -0,0 +1,57 @@ +""" +Models for the Speechmatics TTS SDK. + +This module contains all data models, enums, and configuration classes used +throughout the Speechmatics TTS SDK. These models +provide type-safe interfaces for configuration, job management, and +result handling based on the official Speechmatics API schema. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum + + +@dataclass +class ConnectionConfig: + """ + Configuration for HTTP connection parameters. + + This class defines connection-related settings and timeouts. + + Attributes: + connect_timeout: Timeout in seconds for connection establishment. + operation_timeout: Default timeout for API operations. + """ + + connect_timeout: float = 30.0 + operation_timeout: float = 300.0 + + +class OutputFormat(str, Enum): + """ + Output format for the generated audio. + + Attributes: + wav_16000: WAV audio format with 16kHz sample rate. + raw_pcm_16000: Raw audio format with 16kHz sample rate. + """ + + WAV_16000 = "wav_16000" + RAW_PCM_16000 = "pcm_16000" + + +class Voice(str, Enum): + """ + Voice ID for the generated audio. + + Attributes: + sarah: English (UK) female voice. + theo: English (UK) male voice. + megan: English (UK) female voice. + """ + + SARAH = "sarah" + THEO = "theo" + MEGAN = "megan" diff --git a/sdk/tts/speechmatics/tts/_transport.py b/sdk/tts/speechmatics/tts/_transport.py new file mode 100644 index 00000000..1377be06 --- /dev/null +++ b/sdk/tts/speechmatics/tts/_transport.py @@ -0,0 +1,306 @@ +""" +Transport layer for Speechmatics Batch HTTP communication. + +This module provides the Transport class that handles low-level HTTP +communication with the Speechmatics Batch API, including connection management, +request/response handling, and authentication. +""" + +from __future__ import annotations + +import asyncio +import io +import sys +import uuid +from typing import Any +from typing import Optional + +import aiohttp + +from ._auth import AuthBase +from ._exceptions import AuthenticationError +from ._exceptions import ConnectionError +from ._exceptions import TransportError +from ._helpers import get_version +from ._logging import get_logger +from ._models import ConnectionConfig + + +class Transport: + """ + HTTP transport layer for Speechmatics Batch API communication. + + This class handles all low-level HTTP communication with the Speechmatics + Batch API, including connection management, request serialization, + authentication, and response handling. + + Args: + url: Base URL for the Speechmatics Batch API. + conn_config: Connection configuration including URL and timeouts. + auth: Authentication instance for handling credentials. + request_id: Optional unique identifier for request tracking. Generated + automatically if not provided. + + Attributes: + conn_config: The connection configuration object. + request_id: Unique identifier for this transport instance. + + Examples: + Basic usage: + >>> from ._auth import StaticKeyAuth + >>> conn_config = ConnectionConfig() + >>> auth = StaticKeyAuth("your-api-key") + >>> transport = Transport(conn_config, auth) + >>> response = await transport.get("/jobs") + >>> await transport.close() + """ + + def __init__( + self, + url: str, + conn_config: ConnectionConfig, + auth: AuthBase, + request_id: Optional[str] = None, + ) -> None: + """ + Initialize the transport with connection configuration. + + Args: + conn_config: Connection configuration object containing connection parameters. + auth: Authentication instance for handling credentials. + request_id: Optional unique identifier for request tracking. + Generated automatically if not provided. + """ + self._url = url + self._conn_config = conn_config + self._auth = auth + self._request_id = request_id or str(uuid.uuid4()) + self._session: Optional[aiohttp.ClientSession] = None + self._closed = False + self._logger = get_logger(__name__) + + self._logger.debug("Transport initialized (request_id=%s, url=%s)", self._request_id, self._url) + + async def __aenter__(self) -> Transport: + """Async context manager entry.""" + self._ensure_session() + return self + + async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: + """Async context manager exit with automatic cleanup.""" + await self.close() + + async def post( + self, + path: str, + json_data: Optional[dict[str, Any]] = None, + multipart_data: Optional[dict[str, Any]] = None, + timeout: Optional[float] = None, + ) -> aiohttp.ClientResponse: + """ + Send POST request to the API. + + Args: + path: API endpoint path + json_data: Optional JSON data for request body + multipart_data: Optional multipart form data + timeout: Optional request timeout + + Returns: + HTTP response object + + Raises: + AuthenticationError: If authentication fails + TransportError: If request fails + """ + return await self._request("POST", path, json_data=json_data, multipart_data=multipart_data, timeout=timeout) + + async def close(self) -> None: + """ + Close the HTTP session and cleanup resources. + + This method gracefully closes the HTTP session and marks the + transport as closed. It's safe to call multiple times. + """ + if self._session: + try: + await self._session.close() + except Exception: + pass # Best effort cleanup + finally: + self._session = None + self._closed = True + + @property + def is_connected(self) -> bool: + """ + Check if the transport has an active session. + + Returns: + True if session is active, False otherwise + """ + return self._session is not None and not self._closed + + def _ensure_session(self) -> None: + """Ensure HTTP session is created.""" + if self._session is None and not self._closed: + self._logger.debug( + "Creating HTTP session (connect_timeout=%.1fs, operation_timeout=%.1fs)", + self._conn_config.connect_timeout, + self._conn_config.operation_timeout, + ) + timeout = aiohttp.ClientTimeout( + total=self._conn_config.operation_timeout, + connect=self._conn_config.connect_timeout, + ) + self._session = aiohttp.ClientSession(timeout=timeout) + + async def _request( + self, + method: str, + path: str, + params: Optional[dict[str, Any]] = None, + json_data: Optional[dict[str, Any]] = None, + multipart_data: Optional[dict[str, Any]] = None, + timeout: Optional[float] = None, + ) -> aiohttp.ClientResponse: + """ + Send HTTP request to the API. + + Args: + method: HTTP method (GET, POST, DELETE) + path: API endpoint path + params: Optional query parameters + json_data: Optional JSON data for request body + multipart_data: Optional multipart form data + timeout: Optional request timeout + + Returns: + HTTP response object + + Raises: + AuthenticationError: If authentication fails + ConnectionError: If connection fails + TransportError: For other transport errors + """ + self._ensure_session() + + if self._session is None: + raise ConnectionError("Failed to create HTTP session") + + url = f"{self._url.rstrip('/')}{path}" + headers = await self._prepare_headers() + + self._logger.debug( + "Sending HTTP request %s %s (json=%s, multipart=%s)", + method, + url, + json_data is not None, + multipart_data is not None, + ) + + # Override timeout if specified + if timeout: + request_timeout = aiohttp.ClientTimeout(total=timeout) + else: + request_timeout = None + + try: + # Prepare request arguments + kwargs: dict[str, Any] = { + "headers": headers, + "params": params, + "timeout": request_timeout, + } + + if json_data: + kwargs["json"] = json_data + elif multipart_data: + # Force multipart encoding even when no files are present (for fetch_data support) + form_data = aiohttp.FormData(default_to_multipart=True) + for key, value in multipart_data.items(): + if isinstance(value, tuple) and len(value) == 3: + # File data: (filename, file_data, content_type) + filename, file_data, content_type = value + # aiohttp cannot serialize io.BytesIO directly; convert to bytes + if isinstance(file_data, io.BytesIO): + file_payload = file_data.getvalue() + else: + file_payload = file_data + form_data.add_field(key, file_payload, filename=filename, content_type=content_type) + else: + # Regular form field + if isinstance(value, dict): + import json + + value = json.dumps(value) + form_data.add_field(key, value) + kwargs["data"] = form_data + + response = await self._session.request(method, url, **kwargs) + try: + return await self._handle_response(response) + except Exception: + response.close() + raise + + except asyncio.TimeoutError: + self._logger.error( + "Request timeout %s %s (timeout=%.1fs)", method, path, self._conn_config.operation_timeout + ) + raise TransportError(f"Request timeout for {method} {path}") from None + except aiohttp.ClientError as e: + self._logger.error("Request failed %s %s: %s", method, path, e) + raise ConnectionError(f"Request failed: {e}") from e + except Exception as e: + self._logger.error("Unexpected error %s %s: %s", method, path, e) + raise TransportError(f"Unexpected error: {e}") from e + + async def _prepare_headers(self) -> dict[str, str]: + """ + Prepare HTTP headers for requests. + + Returns: + Headers dictionary with authentication and tracking info + """ + auth_headers = await self._auth.get_auth_headers() + auth_headers["User-Agent"] = ( + f"speechmatics-batch-v{get_version()} python/{sys.version_info.major}.{sys.version_info.minor}" + ) + + if self._request_id: + auth_headers["X-Request-Id"] = self._request_id + + return auth_headers + + async def _handle_response(self, response: aiohttp.ClientResponse) -> aiohttp.ClientResponse: + """ + Handle HTTP response and extract JSON data. + + Args: + response: HTTP response object + + Returns: + HTTP response object + + Raises: + AuthenticationError: For 401/403 responses + TransportError: For other error responses + """ + try: + if response.status == 401: + raise AuthenticationError("Invalid API key - authentication failed") + elif response.status == 403: + raise AuthenticationError("Access forbidden - check API key permissions") + elif response.status >= 400: + error_text = await response.text() + self._logger.error("HTTP error %d %s: %s", response.status, response.reason, error_text) + raise TransportError(f"HTTP {response.status}: {response.reason} - {error_text}") + return response + + except aiohttp.ContentTypeError as e: + self._logger.error("Failed to parse JSON response: %s", e) + raise TransportError(f"Failed to parse response: {e}") from e + except Exception as e: + self._logger.error("Error handling response: %s", e) + raise TransportError(f"Error handling response: {e}") from e diff --git a/tests/tts/async_http_test.py b/tests/tts/async_http_test.py new file mode 100644 index 00000000..72b5eff5 --- /dev/null +++ b/tests/tts/async_http_test.py @@ -0,0 +1,19 @@ +import os + +import pytest + +from speechmatics.tts import AsyncClient + + +@pytest.mark.asyncio +@pytest.mark.skipif(os.getenv("SPEECHMATICS_API_KEY") is None, reason="Skipping test if API key is not set") +async def test_async_http(): + async with AsyncClient() as client: + async with await client.generate(text="Hello world") as response: + start_length = response.content.total_raw_bytes + assert response.status == 200 + async for chunk in response.content.iter_chunked(1024): + assert chunk + end_length = response.content.total_raw_bytes + # Assert that bytes are streamed async from the socket rather than awaited + assert start_length <= end_length