Skip to content
This repository was archived by the owner on Sep 25, 2023. It is now read-only.

Commit cbd00b2

Browse files
Implement silence detection
1 parent fb6cd5c commit cbd00b2

File tree

7 files changed

+274
-23
lines changed

7 files changed

+274
-23
lines changed

ConfusedPolarBear.Plugin.IntroSkipper.Tests/TestAudioFingerprinting.cs

+23
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,29 @@ public void TestIntroDetection()
108108
Assert.Equal(22.912, rhs.IntroEnd);
109109
}
110110

111+
/// <summary>
112+
/// Test that the silencedetect wrapper is working.
113+
/// </summary>
114+
[FactSkipFFmpegTests]
115+
public void TestSilenceDetection()
116+
{
117+
var clip = queueEpisode("audio/big_buck_bunny_clip.mp3");
118+
119+
var expected = new TimeRange[]
120+
{
121+
new TimeRange(44.6310, 44.8072),
122+
new TimeRange(53.5905, 53.8070),
123+
new TimeRange(53.8458, 54.2024),
124+
new TimeRange(54.2611, 54.5935),
125+
new TimeRange(54.7098, 54.9293),
126+
new TimeRange(54.9294, 55.2590),
127+
};
128+
129+
var actual = FFmpegWrapper.DetectSilence(clip, 60);
130+
131+
Assert.Equal(expected, actual);
132+
}
133+
111134
private QueuedEpisode queueEpisode(string path)
112135
{
113136
return new QueuedEpisode()

ConfusedPolarBear.Plugin.IntroSkipper.Tests/TestContiguous.cs

+18
Original file line numberDiff line numberDiff line change
@@ -71,4 +71,22 @@ public void TestFuturama()
7171

7272
Assert.Equal(expected, actual);
7373
}
74+
75+
/// <summary>
76+
/// Tests that TimeRange intersections are detected correctly.
77+
/// Tests each time range against a range of 5 to 10 seconds.
78+
/// </summary>
79+
[Theory]
80+
[InlineData(1, 4, false)] // too early
81+
[InlineData(4, 6, true)] // intersects on the left
82+
[InlineData(7, 8, true)] // in the middle
83+
[InlineData(9, 12, true)] // intersects on the right
84+
[InlineData(13, 15, false)] // too late
85+
public void TestTimeRangeIntersection(int start, int end, bool expected)
86+
{
87+
var large = new TimeRange(5, 10);
88+
var testRange = new TimeRange(start, end);
89+
90+
Assert.Equal(expected, large.Intersects(testRange));
91+
}
7492
}

ConfusedPolarBear.Plugin.IntroSkipper/AutoSkip.cs

+3-1
Original file line numberDiff line numberDiff line change
@@ -169,14 +169,16 @@ private void PlaybackTimer_Elapsed(object? sender, ElapsedEventArgs e)
169169
// Send the seek command
170170
_logger.LogDebug("Sending seek command to {Session}", deviceId);
171171

172+
var introEnd = (long)intro.IntroEnd - Plugin.Instance!.Configuration.AmountOfIntroToPlay;
173+
172174
_sessionManager.SendPlaystateCommand(
173175
session.Id,
174176
session.Id,
175177
new PlaystateRequest
176178
{
177179
Command = PlaystateCommand.Seek,
178180
ControllingUserId = session.UserId.ToString("N"),
179-
SeekPositionTicks = (long)intro.IntroEnd * TimeSpan.TicksPerSecond,
181+
SeekPositionTicks = introEnd * TimeSpan.TicksPerSecond,
180182
},
181183
CancellationToken.None);
182184

ConfusedPolarBear.Plugin.IntroSkipper/Configuration/PluginConfiguration.cs

+12-1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,17 @@ public PluginConfiguration()
6262
/// </summary>
6363
public int MinimumIntroDuration { get; set; } = 15;
6464

65+
/// <summary>
66+
/// Gets or sets the maximum amount of noise (in dB) that is considered silent.
67+
/// Lowering this number will increase the filter's sensitivity to noise.
68+
/// </summary>
69+
public int SilenceDetectionMaximumNoise { get; set; } = -50;
70+
71+
/// <summary>
72+
/// Gets or sets the minimum duration of audio (in seconds) that is considered silent.
73+
/// </summary>
74+
public double SilenceDetectionMinimumDuration { get; set; } = 0.50;
75+
6576
// ===== Playback settings =====
6677

6778
/// <summary>
@@ -83,5 +94,5 @@ public PluginConfiguration()
8394
/// Gets or sets the amount of intro to play (in seconds).
8495
/// TODO: rename.
8596
/// </summary>
86-
public int AmountOfIntroToPlay { get; set; } = 5;
97+
public int AmountOfIntroToPlay { get; set; } = 2;
8798
}

ConfusedPolarBear.Plugin.IntroSkipper/Data/TimeRange.cs

+12
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,18 @@ public int CompareTo(object? obj)
6969

7070
return tr.Duration.CompareTo(Duration);
7171
}
72+
73+
/// <summary>
74+
/// Tests if this TimeRange object intersects the provided TimeRange.
75+
/// </summary>
76+
/// <param name="tr">Second TimeRange object to test.</param>
77+
/// <returns>true if tr intersects the current TimeRange, false otherwise.</returns>
78+
public bool Intersects(TimeRange tr)
79+
{
80+
return
81+
(Start < tr.Start && tr.Start < End) ||
82+
(Start < tr.End && tr.End < End);
83+
}
7284
}
7385

7486
#pragma warning restore CA1036

ConfusedPolarBear.Plugin.IntroSkipper/FFmpegWrapper.cs

+120-16
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using System.Globalization;
55
using System.IO;
66
using System.Text;
7+
using System.Text.RegularExpressions;
78
using Microsoft.Extensions.Logging;
89

910
namespace ConfusedPolarBear.Plugin.IntroSkipper;
@@ -13,6 +14,16 @@ namespace ConfusedPolarBear.Plugin.IntroSkipper;
1314
/// </summary>
1415
public static class FFmpegWrapper
1516
{
17+
// FFmpeg logs lines similar to the following:
18+
// [silencedetect @ 0x000000000000] silence_start: 12.34
19+
// [silencedetect @ 0x000000000000] silence_end: 56.123 | silence_duration: 43.783
20+
21+
/// <summary>
22+
/// Used with FFmpeg's silencedetect filter to extract the start and end times of silence.
23+
/// </summary>
24+
private static readonly Regex SilenceDetectionExpression = new(
25+
"silence_(?<type>start|end): (?<time>[0-9\\.]+)");
26+
1627
/// <summary>
1728
/// Gets or sets the logger.
1829
/// </summary>
@@ -31,11 +42,11 @@ public static bool CheckFFmpegVersion()
3142
try
3243
{
3344
// Log the output of "ffmpeg -version".
34-
ChromaprintLogs["version"] = Encoding.UTF8.GetString(GetOutput("-version", 2000));
45+
ChromaprintLogs["version"] = Encoding.UTF8.GetString(GetOutput("-version", string.Empty, false, 2000));
3546
Logger?.LogDebug("ffmpeg version information: {Version}", ChromaprintLogs["version"]);
3647

3748
// First, validate that the installed version of ffmpeg supports chromaprint at all.
38-
var muxers = Encoding.UTF8.GetString(GetOutput("-muxers", 2000));
49+
var muxers = Encoding.UTF8.GetString(GetOutput("-muxers", string.Empty, false, 2000));
3950
ChromaprintLogs["muxer list"] = muxers;
4051
Logger?.LogTrace("ffmpeg muxers: {Muxers}", muxers);
4152

@@ -47,7 +58,7 @@ public static bool CheckFFmpegVersion()
4758
}
4859

4960
// Second, validate that ffmpeg understands the "-fp_format raw" option.
50-
var muxerHelp = Encoding.UTF8.GetString(GetOutput("-h muxer=chromaprint", 2000));
61+
var muxerHelp = Encoding.UTF8.GetString(GetOutput("-h muxer=chromaprint", string.Empty, false, 2000));
5162
ChromaprintLogs["muxer options"] = muxerHelp;
5263
Logger?.LogTrace("ffmpeg chromaprint help: {MuxerHelp}", muxerHelp);
5364

@@ -90,10 +101,9 @@ public static uint[] Fingerprint(QueuedEpisode episode)
90101
}
91102

92103
Logger?.LogDebug(
93-
"Fingerprinting {Duration} seconds from \"{File}\" (length {Length}, id {Id})",
104+
"Fingerprinting {Duration} seconds from \"{File}\" (id {Id})",
94105
episode.FingerprintDuration,
95106
episode.Path,
96-
episode.Path.Length,
97107
episode.EpisodeId);
98108

99109
var args = string.Format(
@@ -103,7 +113,7 @@ public static uint[] Fingerprint(QueuedEpisode episode)
103113
episode.FingerprintDuration);
104114

105115
// Returns all fingerprint points as raw 32 bit unsigned integers (little endian).
106-
var rawPoints = GetOutput(args);
116+
var rawPoints = GetOutput(args, string.Empty);
107117
if (rawPoints.Length == 0 || rawPoints.Length % 4 != 0)
108118
{
109119
Logger?.LogWarning("Chromaprint returned {Count} points for \"{Path}\"", rawPoints.Length, episode.Path);
@@ -153,34 +163,116 @@ public static Dictionary<uint, int> CreateInvertedIndex(Guid id, uint[] fingerpr
153163
}
154164

155165
/// <summary>
156-
/// Runs ffmpeg and returns standard output.
166+
/// Detect ranges of silence in the provided episode.
167+
/// </summary>
168+
/// <param name="episode">Queued episode.</param>
169+
/// <param name="limit">Maximum amount of audio (in seconds) to detect silence in.</param>
170+
/// <returns>Array of TimeRange objects that are silent in the queued episode.</returns>
171+
public static TimeRange[] DetectSilence(QueuedEpisode episode, int limit)
172+
{
173+
Logger?.LogTrace(
174+
"Detecting silence in \"{File}\" (limit {Limit}, id {Id})",
175+
episode.Path,
176+
limit,
177+
episode.EpisodeId);
178+
179+
// TODO: select the audio track that matches the user's preferred language, falling
180+
// back to the first track if nothing matches
181+
182+
// -vn, -sn, -dn: ignore video, subtitle, and data tracks
183+
var args = string.Format(
184+
CultureInfo.InvariantCulture,
185+
"-vn -sn -dn " +
186+
"-i \"{0}\" -to {1} -af \"silencedetect=noise={2}dB:duration=0.1\" -f null -",
187+
episode.Path,
188+
limit,
189+
Plugin.Instance?.Configuration.SilenceDetectionMaximumNoise ?? -50);
190+
191+
// Cache the output of this command to "GUID-intro-silence-v1"
192+
var cacheKey = episode.EpisodeId.ToString("N") + "-intro-silence-v1";
193+
194+
var currentRange = new TimeRange();
195+
var silenceRanges = new List<TimeRange>();
196+
197+
// Each match will have a type (either "start" or "end") and a timecode (a double).
198+
var raw = Encoding.UTF8.GetString(GetOutput(args, cacheKey, true));
199+
foreach (Match match in SilenceDetectionExpression.Matches(raw))
200+
{
201+
var isStart = match.Groups["type"].Value == "start";
202+
var time = Convert.ToDouble(match.Groups["time"].Value, CultureInfo.InvariantCulture);
203+
204+
if (isStart)
205+
{
206+
currentRange.Start = time;
207+
}
208+
else
209+
{
210+
currentRange.End = time;
211+
silenceRanges.Add(new TimeRange(currentRange));
212+
}
213+
}
214+
215+
return silenceRanges.ToArray();
216+
}
217+
218+
/// <summary>
219+
/// Runs ffmpeg and returns standard output (or error).
220+
/// If caching is enabled, will use cacheFilename to cache the output of this command.
157221
/// </summary>
158222
/// <param name="args">Arguments to pass to ffmpeg.</param>
159-
/// <param name="timeout">Timeout (in seconds) to wait for ffmpeg to exit.</param>
160-
private static ReadOnlySpan<byte> GetOutput(string args, int timeout = 60 * 1000)
223+
/// <param name="cacheFilename">Filename to cache the output of this command to, or string.Empty if this command should not be cached.</param>
224+
/// <param name="stderr">If standard error should be returned.</param>
225+
/// <param name="timeout">Timeout (in miliseconds) to wait for ffmpeg to exit.</param>
226+
private static ReadOnlySpan<byte> GetOutput(
227+
string args,
228+
string cacheFilename,
229+
bool stderr = false,
230+
int timeout = 60 * 1000)
161231
{
162232
var ffmpegPath = Plugin.Instance?.FFmpegPath ?? "ffmpeg";
163233

234+
var cacheOutput =
235+
(Plugin.Instance?.Configuration.CacheFingerprints ?? false) &&
236+
!string.IsNullOrEmpty(cacheFilename);
237+
238+
// If caching is enabled, try to load the output of this command from the cached file.
239+
if (cacheOutput)
240+
{
241+
// Calculate the absolute path to the cached file.
242+
cacheFilename = Path.Join(Plugin.Instance!.FingerprintCachePath, cacheFilename);
243+
244+
// If the cached file exists, return whatever it holds.
245+
if (File.Exists(cacheFilename))
246+
{
247+
Logger?.LogTrace("Returning contents of cache {Cache}", cacheFilename);
248+
return File.ReadAllBytes(cacheFilename);
249+
}
250+
251+
Logger?.LogTrace("Not returning contents of cache {Cache} (not found)", cacheFilename);
252+
}
253+
164254
// Prepend some flags to prevent FFmpeg from logging it's banner and progress information
165255
// for each file that is fingerprinted.
166-
var info = new ProcessStartInfo(ffmpegPath, args.Insert(0, "-hide_banner -loglevel warning "))
256+
var info = new ProcessStartInfo(ffmpegPath, args.Insert(0, "-hide_banner -loglevel info "))
167257
{
168258
WindowStyle = ProcessWindowStyle.Hidden,
169259
CreateNoWindow = true,
170260
UseShellExecute = false,
171261
ErrorDialog = false,
172262

173-
// We only consume standardOutput.
174-
RedirectStandardOutput = true,
175-
RedirectStandardError = false
263+
RedirectStandardOutput = !stderr,
264+
RedirectStandardError = stderr
176265
};
177266

178267
var ffmpeg = new Process
179268
{
180269
StartInfo = info
181270
};
182271

183-
Logger?.LogDebug("Starting ffmpeg with the following arguments: {Arguments}", ffmpeg.StartInfo.Arguments);
272+
Logger?.LogDebug(
273+
"Starting ffmpeg with the following arguments: {Arguments}",
274+
ffmpeg.StartInfo.Arguments);
275+
184276
ffmpeg.Start();
185277

186278
using (MemoryStream ms = new MemoryStream())
@@ -190,19 +282,29 @@ private static ReadOnlySpan<byte> GetOutput(string args, int timeout = 60 * 1000
190282

191283
do
192284
{
193-
bytesRead = ffmpeg.StandardOutput.BaseStream.Read(buf, 0, buf.Length);
285+
var streamReader = stderr ? ffmpeg.StandardError : ffmpeg.StandardOutput;
286+
bytesRead = streamReader.BaseStream.Read(buf, 0, buf.Length);
194287
ms.Write(buf, 0, bytesRead);
195288
}
196289
while (bytesRead > 0);
197290

198291
ffmpeg.WaitForExit(timeout);
199292

200-
return ms.ToArray().AsSpan();
293+
var output = ms.ToArray();
294+
295+
// If caching is enabled, cache the output of this command.
296+
if (cacheOutput)
297+
{
298+
File.WriteAllBytes(cacheFilename, output);
299+
}
300+
301+
return output;
201302
}
202303
}
203304

204305
/// <summary>
205306
/// Tries to load an episode's fingerprint from cache. If caching is not enabled, calling this function is a no-op.
307+
/// This function was created before the unified caching mechanism was introduced (in v0.1.7).
206308
/// </summary>
207309
/// <param name="episode">Episode to try to load from cache.</param>
208310
/// <param name="fingerprint">Array to store the fingerprint in.</param>
@@ -256,6 +358,7 @@ private static bool LoadCachedFingerprint(QueuedEpisode episode, out uint[] fing
256358

257359
/// <summary>
258360
/// Cache an episode's fingerprint to disk. If caching is not enabled, calling this function is a no-op.
361+
/// This function was created before the unified caching mechanism was introduced (in v0.1.7).
259362
/// </summary>
260363
/// <param name="episode">Episode to store in cache.</param>
261364
/// <param name="fingerprint">Fingerprint of the episode to store.</param>
@@ -280,6 +383,7 @@ private static void CacheFingerprint(QueuedEpisode episode, List<uint> fingerpri
280383

281384
/// <summary>
282385
/// Determines the path an episode should be cached at.
386+
/// This function was created before the unified caching mechanism was introduced (in v0.1.7).
283387
/// </summary>
284388
/// <param name="episode">Episode.</param>
285389
private static string GetFingerprintCachePath(QueuedEpisode episode)

0 commit comments

Comments
 (0)