From 9aac8e08dec5e5b6e5747ba53557fd3aa5b57d66 Mon Sep 17 00:00:00 2001 From: Jonathan de Boyne Pollard Date: Mon, 13 Mar 2017 12:32:34 +0000 Subject: [PATCH] Reworked silence interpolation to work around duplicated silence problem. --- extractaudio/decoder.c | 82 ++++++++++++++++++++++++++++++------------ extractaudio/decoder.h | 2 ++ 2 files changed, 61 insertions(+), 23 deletions(-) diff --git a/extractaudio/decoder.c b/extractaudio/decoder.c index 09dd2298b..04ca3d825 100644 --- a/extractaudio/decoder.c +++ b/extractaudio/decoder.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "config.h" @@ -70,6 +71,7 @@ decoder_new(struct session *sp, int dflags) dp->stime = dp->pp->pkt->time; dp->nticks = dp->sticks = dp->pp->parsed.ts; dp->dticks = 0; + dp->silence_at_receiver = dp->silence_from_sender = 0; dp->lpt = RTP_PCMU; dp->dflags = dflags; /* dp->f = fopen(i, "w"); */ @@ -78,13 +80,47 @@ decoder_new(struct session *sp, int dflags) return (void *)dp; } +static +unsigned int +extract_some_pending_silence(struct decoder_stream *dp) +{ + unsigned int t; + + if (dp->silence_at_receiver > 0) { + /* Handle overlapping silences. + ** If the sender has a gap in the timestamps AND packets arrive later + ** than playout time, we do not want to double up the generated silence. + ** So we elide any silence caused by late arrivals that is covered by + ** the period of sender-indicated silence. + */ + if (dp->silence_from_sender > 0) { + warnx("Silence overlap: explicit sender gap of %u ticks, inferred silence of %u ticks at receiver", dp->silence_from_sender, dp->silence_at_receiver); + if (dp->silence_at_receiver >= dp->silence_from_sender) + dp->silence_at_receiver -= dp->silence_from_sender; + else + dp->silence_at_receiver = 0; + } + t = dp->silence_at_receiver; + if (t > 4000) + t = 4000; + dp->silence_at_receiver -= t; + if (t > 0) return t; + } + + t = dp->silence_from_sender; + if (t > 4000) + t = 4000; + dp->silence_from_sender -= t; + return t; +} + int32_t decoder_get(struct decoder_stream *dp) { unsigned int cticks, t; int j; - if (dp->oblen == 0) { + while (dp->oblen <= 0) { if (dp->pp == NULL) return DECODER_EOF; cticks = dp->pp->parsed.ts; @@ -97,30 +133,16 @@ decoder_get(struct decoder_stream *dp) dp->nticks = cticks; dp->sticks = cticks - (dp->pp->pkt->time - dp->stime) * 8000; } + /* Calculate sender-indicated silence between the expected timestamp and the current one. */ if (dp->nticks < cticks) { t = cticks - dp->nticks; - if (t > 4000) - t = 4000; - if ((dp->dflags & D_FLAG_NOSYNC) != 0) { - dp->nticks += t; - dp->dticks += t; - return (DECODER_SKIP); - } - j = generate_silence(dp, dp->obuf, t); - if (j <= 0) - return DECODER_ERROR; dp->nticks += t; - dp->dticks += t; - dp->oblen = j / 2; - dp->obp = dp->obuf; - } else if ((dp->pp->pkt->time - dp->stime - (double)dp->dticks / 8000.0) > 0.2) { - t = (((dp->pp->pkt->time - dp->stime) * 8000) - dp->dticks) / 2; - if (t > 4000) - t = 4000; - if ((dp->dflags & D_FLAG_NOSYNC) != 0) { - dp->dticks += t; + if ((dp->dflags & D_FLAG_NOSYNC) != 0) return (DECODER_SKIP); - } + dp->silence_from_sender += t; + } + t = extract_some_pending_silence(dp); + if (t > 0) { j = generate_silence(dp, dp->obuf, t); if (j <= 0) return DECODER_ERROR; @@ -128,13 +150,27 @@ decoder_get(struct decoder_stream *dp) dp->oblen = j / 2; dp->obp = dp->obuf; } else { + /* Calculate receiver-detected silence from packet arrival time + ** compared to playout time. + ** We do not generate silence immediately, because we want the + ** generated silence to follow the audio, and possibly be + ** subsumed into any sender-indicated silence. + ** We do not infer silence if there is no audio payload, because + ** we want the silence to prefix the next actual audio data. + */ + if (RPLEN(dp->pp) > 0 && (dp->pp->pkt->time - dp->stime - dp->dticks / 8000.0) > 0.2) { + t = (((dp->pp->pkt->time - dp->stime) * 8000) - dp->dticks) / 2; + if ((dp->dflags & D_FLAG_NOSYNC) != 0) { + dp->dticks += t; + return (DECODER_SKIP); + } + dp->silence_at_receiver += t; + } j = decode_frame(dp, dp->obuf, RPLOAD(dp->pp), RPLEN(dp->pp), \ sizeof(dp->obuf)); if (j > 0) dp->lpt = dp->pp->rpkt->pt; dp->pp = MYQ_NEXT(dp->pp); - if (j <= 0) - return decoder_get(dp); dp->oblen = j / 2; dp->obp = dp->obuf; } diff --git a/extractaudio/decoder.h b/extractaudio/decoder.h index 7d8ea7c96..19279452e 100644 --- a/extractaudio/decoder.h +++ b/extractaudio/decoder.h @@ -72,6 +72,8 @@ struct decoder_stream { #endif double stime; double dticks; + unsigned int silence_at_receiver; /* in ticks */ + unsigned int silence_from_sender; /* in ticks */ /* FILE *f; */ int dflags; };