Add remove-disfluencies arg (#96)

TeriDSpeech · web-flow · commit 9c370ce3df3e · 2024-05-03T11:01:56.000+01:00
* Add remove-disfluencies arg

* Fix all_options test

* Add remove-disfluency arg parsing

* lint

* Nest remove_disfluencies within a transcript_filtering_config

* lint

* Fix dicts to use optional type

* Undo unecessary formatting change

* Remove unecessary remove_disfluencies param

* Fix copy paste error for transcript_filtering_config
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [1.14.6] - 2024-04-26
+
+## Added
+
+- Support for removing words tagged as disfluency.
+
 ## [1.14.5] - 2024-03-20
 
 ## Added
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-1.14.5
+1.14.6
diff --git a/speechmatics/cli.py b/speechmatics/cli.py
@@ -235,6 +235,12 @@ def get_transcription_config(
             "volume_threshold": args.get("volume_threshold")
         }
 
+    if args.get("remove_disfluencies") is not None:
+        config["transcript_filtering_config"] = {}
+        config["transcript_filtering_config"]["remove_disfluencies"] = args.get(
+            "remove_disfluencies"
+        )
+
     if args.get("ctrl"):
         LOGGER.warning(f"Using internal dev control command: {args['ctrl']}")
         config["ctrl"] = json.loads(args["ctrl"])
diff --git a/speechmatics/cli_parser.py b/speechmatics/cli_parser.py
@@ -486,6 +486,12 @@ def get_arg_parser():
         required=False,
         help="Comma-separated list of whitelisted event types for audio events.",
     )
+    rt_transcribe_command_parser.add_argument(
+        "--remove-disfluencies",
+        default=False,
+        action="store_true",
+        help="Removes words tagged as disfluency.",
+    )
 
     # Parent parser for batch auto-chapters argument
     batch_audio_events_parser = argparse.ArgumentParser(add_help=False)
diff --git a/speechmatics/models.py b/speechmatics/models.py
@@ -128,9 +128,12 @@ def asdict(self) -> Dict[Any, Any]:
     enable_entities: bool = None
     """Indicates if inverse text normalization entity output is enabled."""
 
-    audio_filtering_config: dict = None
+    audio_filtering_config: Optional[dict] = None
     """Configuration for limiting the transcription of quiet audio."""
 
+    transcript_filtering_config: Optional[dict] = None
+    """Configuration for applying filtering to the transcription."""
+
 
 @dataclass
 class RTSpeakerDiarizationConfig:
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -623,6 +623,7 @@ def test_rt_main_with_all_options(mock_server, tmp_path):
         str(chunk_size),
         "--auth-token=xyz",
         audio_path,
+        "--remove-disfluencies",
     ]
 
     cli.main(vars(cli.parse_args(args)))
@@ -660,6 +661,12 @@ def test_rt_main_with_all_options(mock_server, tmp_path):
     assert msg["transcription_config"]["max_delay_mode"] == "fixed"
     assert msg["transcription_config"]["speaker_change_sensitivity"] == 0.8
     assert msg["transcription_config"].get("operating_point") is None
+    assert (
+        msg["transcription_config"]["transcript_filtering_config"][
+            "remove_disfluencies"
+        ]
+        is True
+    )
 
     # Check that the chunk size argument is respected
     add_audio_messages = mock_server.find_add_audio_messages()