Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions sdk/batch/speechmatics/batch/_transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,9 +297,9 @@ async def _prepare_headers(self) -> dict[str, str]:
Headers dictionary with authentication and tracking info
"""
auth_headers = await self._auth.get_auth_headers()
auth_headers[
"User-Agent"
] = f"speechmatics-batch-v{get_version()} python/{sys.version_info.major}.{sys.version_info.minor}"
auth_headers["User-Agent"] = (
f"speechmatics-batch-v{get_version()} python/{sys.version_info.major}.{sys.version_info.minor}"
)

if self._request_id:
auth_headers["X-Request-Id"] = self._request_id
Expand Down
2 changes: 2 additions & 0 deletions sdk/rt/speechmatics/rt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from ._models import ServerMessageType
from ._models import SessionInfo
from ._models import SpeakerDiarizationConfig
from ._models import SpeakerIdentifier
from ._models import TranscriptionConfig
from ._models import TranscriptResult
from ._models import TranslationConfig
Expand Down Expand Up @@ -53,6 +54,7 @@
"SessionError",
"SessionInfo",
"SpeakerDiarizationConfig",
"SpeakerIdentifier",
"StaticKeyAuth",
"TimeoutError",
"TranscriptResult",
Expand Down
4 changes: 3 additions & 1 deletion sdk/rt/speechmatics/rt/_base_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from typing import Any
from typing import Optional

from typing_extensions import Self

from ._auth import AuthBase
from ._auth import StaticKeyAuth
from ._events import EventEmitter
Expand Down Expand Up @@ -96,7 +98,7 @@ async def _ws_connect(self, ws_headers: Optional[dict] = None) -> None:
await self._transport.connect(ws_headers)
self._recv_task = asyncio.create_task(self._recv_loop())

async def __aenter__(self) -> _BaseClient:
async def __aenter__(self) -> Self:
return self

async def __aexit__(self, *args: Any) -> None:
Expand Down
44 changes: 42 additions & 2 deletions sdk/rt/speechmatics/rt/_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from dataclasses import asdict
from dataclasses import dataclass
from dataclasses import field
from enum import Enum
from typing import Any
from typing import Optional
Expand Down Expand Up @@ -61,7 +62,7 @@ class ClientMessageType(str, Enum):
EndOfStream: Signals that no more audio data will be sent.
SetRecognitionConfig: Updates transcription configuration during
an active session (advanced use).
GetSpeakers: Internal, Speechmatics only message. Allows the client to request speaker data.
GetSpeakers: Allows the client to request speaker data.

Examples:
>>> # Starting a recognition session
Expand Down Expand Up @@ -110,7 +111,7 @@ class ServerMessageType(str, Enum):
change for the given audio segment.
AddPartialTranslation: Provides interim translation results that
may change as more context becomes available.
SpeakerResult: Internal, Speechmatics only message containing the speakers data.
SpeakerResult: Provides the speaker identification data.
Info: Informational messages from the server.
Warning: Warning messages that don't stop transcription.
Error: Error messages indicating transcription failure.
Expand Down Expand Up @@ -245,19 +246,58 @@ class SpeakerDiarizationConfig:
is a close enough match, even if other speakers may be closer. This is useful
for cases where we can flip incorrectly between similar speakers during a single
speaker section.
speakers: (Optional) Add speaker identifiers to your session to identify specific speakers.
This is a list of SpeakerIdentifier objects generated in previous transcription sessions.
You can provide multiple identifiers for a single speaker to help the engine identify
the speaker more accurately.

Examples:
>>> config = SpeakerDiarizationConfig(
max_speakers=2,
speaker_sensitivity=0.8,
prefer_current_speaker=True,
speakers=[
Comment thread
TudorCRL marked this conversation as resolved.
SpeakerIdentifier(label="Agent", speaker_identifiers=["agent_1"]),
SpeakerIdentifier(label="Customer", speaker_identifiers=["cust_1"]),
],
)

"""

max_speakers: Optional[int] = None
speaker_sensitivity: Optional[float] = None
prefer_current_speaker: Optional[bool] = None
speakers: Optional[list[SpeakerIdentifier]] = None


@dataclass
class SpeakerIdentifier:
"""Labeled speaker identifier for guided speaker diarization.

Use this to map one or more known speaker identifiers to a human-readable
label. When provided in `SpeakerDiarizationConfig.speakers`, the engine can
use these identifiers as hints to consistently assign the specified label.

Attributes:
label: Human-readable label to assign to this speaker or group
(e.g., "Agent", "Customer", "Alice").
speaker_identifiers: A list of string identifiers associated with this
speaker. These can be any stable identifiers relevant to your
application (for example device IDs, prior session speaker IDs,
channel tags, etc.).

Examples:
>>> config = SpeakerDiarizationConfig(
... max_speakers=2,
... speakers=[
... SpeakerIdentifier(label="Agent", speaker_identifiers=["agent_1"]),
... SpeakerIdentifier(label="Customer", speaker_identifiers=["cust_1"]),
... ],
... )
"""

label: str = ""
speaker_identifiers: list[str] = field(default_factory=list)


@dataclass
Expand Down