Skip to content

Commit d54a30f

Browse files
author
Paulo Morgado
committed
WIP
1 parent f4fcd2a commit d54a30f

File tree

86 files changed

+5634
-2088
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

86 files changed

+5634
-2088
lines changed

src/SIPSorcery.csproj

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,18 +21,33 @@
2121
<PackageReference Include="Concentus" Version="2.2.2" />
2222
<PackageReference Include="BouncyCastle.Cryptography" Version="2.5.1" />
2323
<PackageReference Include="DnsClient" Version="1.8.0" />
24+
<PackageReference Include="Microsoft.Extensions.ObjectPool" Version="8.0.16" />
2425
<PackageReference Include="SIPSorcery.WebSocketSharp" Version="0.0.1" />
2526
<PackageReference Include="SIPSorceryMedia.Abstractions" Version="8.0.10" />
2627
<PackageReference Include="System.Net.WebSockets.Client" Version="4.3.2" />
2728
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="8.0.0" PrivateAssets="All" />
2829

2930
<!-- The packages below are transitive references included to overcome vulnerabilities in a top level package. -->
3031
<PackageReference Include="System.Net.Security" Version="4.3.2" /> <!-- Vuln version referenced by System.Net.WebSockets.Client. -->
32+
33+
<PackageReference Include="NetEscapades.EnumGenerators" Version="1.0.0-beta13" /><!-- Source generators for enums -->
34+
</ItemGroup>
35+
36+
<ItemGroup Condition="'$(TargetFramework)' != 'net8.0'">
37+
<PackageReference Include="System.Collections.Immutable" Version="8.0.0" />
3138
</ItemGroup>
3239

3340
<ItemGroup Condition="'$(TargetFramework)' == 'net462' OR '$(TargetFramework)' == 'netstandard2.0'">
3441
<PackageReference Include="System.Memory" Version="4.6.3" />
3542
<PackageReference Include="Microsoft.Bcl.HashCode" Version="6.0.0" />
43+
<PackageReference Include="Microsoft.Bcl.Memory" Version="9.0.5" />
44+
</ItemGroup>
45+
46+
<ItemGroup Condition="'$(TargetFramework)' == 'net462'">
47+
<PackageReference Include="Microsoft.NETFramework.ReferenceAssemblies.net462" Version="1.0.3">
48+
<PrivateAssets>all</PrivateAssets>
49+
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
50+
</PackageReference>
3651
</ItemGroup>
3752

3853
<PropertyGroup>
@@ -44,7 +59,6 @@
4459
<WarningsNotAsErrors>$(WarningsNotAsErrors);CS0809;CS0618;CS8632</WarningsNotAsErrors>
4560
<GenerateDocumentationFile>true</GenerateDocumentationFile>
4661
<!-- Disable warning for missing XML doc comments. -->
47-
<NoWarn>$(NoWarn);CS1591;CS1573;CS1587</NoWarn>
4862
<Authors>Aaron Clauson, Christophe Irles, Rafael Soares &amp; Contributors</Authors>
4963
<Copyright>Copyright © 2010-2025 Aaron Clauson</Copyright>
5064
<PackageLicenseExpression>BSD-3-Clause</PackageLicenseExpression>

src/app/Media/Codecs/AudioEncoder.cs

Lines changed: 70 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,38 @@
1313
// BSD 3-Clause "New" or "Revised" License, see included LICENSE.md file.
1414
//-----------------------------------------------------------------------------
1515

16-
using Concentus;
1716
using System;
1817
using System.Collections.Generic;
1918
using System.Linq;
20-
using SIPSorceryMedia.Abstractions;
21-
using SIPSorcery.Sys;
19+
using Concentus;
2220
using Concentus.Enums;
21+
using Microsoft.Extensions.Logging;
22+
using SIPSorcery.Sys;
23+
using SIPSorceryMedia.Abstractions;
2324

2425
namespace SIPSorcery.Media
2526
{
26-
public class AudioEncoder : IAudioEncoder
27+
public class AudioEncoder : IAudioEncoder, IDisposable
2728
{
2829
private const int G722_BIT_RATE = 64000; // G722 sampling rate is 16KHz with bits per sample of 16.
2930
private const int OPUS_SAMPLE_RATE = 48000; // Opus codec sampling rate, 48KHz.
3031
private const int OPUS_CHANNELS = 2; // Opus codec number of channels.
31-
private const int OPUS_MAXIMUM_FRAME_SIZE = 5760;
32+
33+
/// <summary>
34+
/// The max frame size that the OPUS encoder will accept is 2880 bytes (see IOpusEncoder.Encode).
35+
/// 2880 corresponds to a sample size of 30ms for a single channel at 48Khz with 16 bit PCM. Therefore
36+
/// the max sample size supported by OPUS is 30ms.
37+
/// </summary>
38+
private const int OPUS_MAXIMUM_INPUT_SAMPLES_PER_CHANNEL = 2880;
39+
40+
/// <summary>
41+
/// OPUS max encode size (see IOpusEncoder.Encode).
42+
/// </summary>
43+
private const int OPUS_MAXIMUM_ENCODED_FRAME_SIZE = 1275;
44+
45+
private static ILogger logger = Log.Logger;
46+
47+
private bool _disposedValue = false;
3248

3349
private G722Codec _g722Codec;
3450
private G722CodecState _g722CodecState;
@@ -57,8 +73,8 @@ public class AudioEncoder : IAudioEncoder
5773
new AudioFormat(SDPWellKnownMediaFormatsEnum.G722),
5874
new AudioFormat(SDPWellKnownMediaFormatsEnum.G729),
5975

60-
// Need more testing befoer adding OPUS by default. 24 Dec 2024 AC.
61-
//new AudioFormat(111, "OPUS", OPUS_SAMPLE_RATE, OPUS_CHANNELS, "useinbandfec=1")
76+
// Need more testing before adding OPUS by default. 24 Dec 2024 AC.
77+
//new AudioFormat(111, AudioCodecsEnum.OPUS.ToString(), OPUS_SAMPLE_RATE, OPUS_CHANNELS, "useinbandfec=1")
6278
};
6379

6480
public List<AudioFormat> SupportedFormats
@@ -81,10 +97,15 @@ public AudioEncoder(bool includeLinearFormats = false, bool includeOpus = false)
8197

8298
if(includeOpus)
8399
{
84-
_supportedFormats.Add(new AudioFormat(111, "OPUS", OPUS_SAMPLE_RATE, OPUS_CHANNELS, "useinbandfec=1"));
100+
_supportedFormats.Add(new AudioFormat(111, AudioCodecsEnum.OPUS.ToString(), OPUS_SAMPLE_RATE, OPUS_CHANNELS, "useinbandfec=1"));
85101
}
86102
}
87103

104+
public AudioEncoder(params AudioFormat[] supportedFormats)
105+
{
106+
_supportedFormats = supportedFormats.ToList();
107+
}
108+
88109
public byte[] EncodeAudio(short[] pcm, AudioFormat format)
89110
{
90111
if (format.Codec == AudioCodecsEnum.G722)
@@ -122,31 +143,36 @@ public byte[] EncodeAudio(short[] pcm, AudioFormat format)
122143
}
123144
else if (format.Codec == AudioCodecsEnum.L16)
124145
{
146+
// When netstandard2.1 can be used.
147+
//return MemoryMarshal.Cast<short, byte>(pcm)
148+
125149
// Put on the wire in network byte order (big endian).
126-
return MemoryOperations.ToBigEndianBytes(pcm);
150+
return pcm.SelectMany(x => new byte[] { (byte)(x >> 8), (byte)(x) }).ToArray();
127151
}
128152
else if (format.Codec == AudioCodecsEnum.PCM_S16LE)
129153
{
130154
// Put on the wire as little endian.
131-
return MemoryOperations.ToLittleEndianBytes(pcm);
155+
return pcm.SelectMany(x => new byte[] { (byte)(x), (byte)(x >> 8) }).ToArray();
132156
}
133157
else if (format.Codec == AudioCodecsEnum.OPUS)
134158
{
135159
if (_opusEncoder == null)
136160
{
137-
_opusEncoder = OpusCodecFactory.CreateEncoder(format.ClockRate, format.ChannelCount, OpusApplication.OPUS_APPLICATION_VOIP);
161+
var channelCount = format.ChannelCount > 0 ? format.ChannelCount : OPUS_CHANNELS;
162+
_opusEncoder = OpusCodecFactory.CreateEncoder(format.ClockRate, channelCount, OpusApplication.OPUS_APPLICATION_VOIP);
138163
}
139164

140-
// Opus expects PCM data in float format [-1.0, 1.0].
141-
float[] pcmFloat = new float[pcm.Length];
142-
for (int i = 0; i < pcm.Length; i++)
165+
if (pcm.Length > _opusEncoder.NumChannels * OPUS_MAXIMUM_INPUT_SAMPLES_PER_CHANNEL)
143166
{
144-
pcmFloat[i] = pcm[i] / 32768f; // Convert to float range [-1.0, 1.0]
167+
logger.LogWarning("{audioEncoder} input sample of length {inputSize} supplied to OPUS encoder exceeded maximum limit of {maxLimit}. Reduce sampling period.", nameof(AudioEncoder), pcm.Length, _opusEncoder.NumChannels * OPUS_MAXIMUM_INPUT_SAMPLES_PER_CHANNEL);
168+
return [];
169+
}
170+
else
171+
{
172+
Span<byte> encodedSample = stackalloc byte[OPUS_MAXIMUM_ENCODED_FRAME_SIZE];
173+
int encodedLength = _opusEncoder.Encode(pcm, pcm.Length / _opusEncoder.NumChannels, encodedSample, encodedSample.Length);
174+
return encodedSample.Slice(0, encodedLength).ToArray();
145175
}
146-
147-
byte[] encodedSample = new byte[pcm.Length];
148-
int encodedLength = _opusEncoder.Encode(pcmFloat, pcmFloat.Length / format.ChannelCount, encodedSample, encodedSample.Length);
149-
return encodedSample.AsSpan(0, encodedLength).ToArray();
150176
}
151177
else
152178
{
@@ -172,7 +198,7 @@ public short[] DecodeAudio(byte[] encodedSample, AudioFormat format)
172198
short[] decodedPcm = new short[encodedSample.Length * 2];
173199
int decodedSampleCount = _g722Decoder.Decode(_g722DecoderState, decodedPcm, encodedSample, encodedSample.Length);
174200

175-
return decodedPcm.AsSpan(0, decodedSampleCount).ToArray();
201+
return decodedPcm.Take(decodedSampleCount).ToArray();
176202
}
177203
if (format.Codec == AudioCodecsEnum.G729)
178204
{
@@ -209,10 +235,11 @@ public short[] DecodeAudio(byte[] encodedSample, AudioFormat format)
209235
{
210236
if (_opusDecoder == null)
211237
{
212-
_opusDecoder = OpusCodecFactory.CreateDecoder(format.ClockRate, format.ChannelCount);
238+
var channelCount = format.ChannelCount > 0 ? format.ChannelCount : OPUS_CHANNELS;
239+
_opusDecoder = OpusCodecFactory.CreateDecoder(format.ClockRate, channelCount);
213240
}
214241

215-
int maxSamples = OPUS_MAXIMUM_FRAME_SIZE * format.ChannelCount;
242+
int maxSamples = OPUS_MAXIMUM_INPUT_SAMPLES_PER_CHANNEL * _opusDecoder.NumChannels;
216243
float[] floatBuf = new float[maxSamples];
217244

218245
// Decode returns the number of samples per channel.
@@ -222,7 +249,7 @@ public short[] DecodeAudio(byte[] encodedSample, AudioFormat format)
222249
floatBuf.Length,
223250
false);
224251

225-
int totalFloats = samplesPerChannel * format.ChannelCount;
252+
int totalFloats = samplesPerChannel * _opusDecoder.NumChannels;
226253

227254
// Convert to 16-bit interleaved PCM.
228255
short[] pcm16 = new short[totalFloats];
@@ -246,24 +273,33 @@ public short[] Resample(short[] pcm, int inRate, int outRate)
246273
return PcmResampler.Resample(pcm, inRate, outRate);
247274
}
248275

249-
private short ClampToShort(float value)
250-
{
251-
if (value > short.MaxValue)
276+
private float ClampToFloat(float value, float min, float max)
252277
{
253-
return short.MaxValue;
278+
if (value < min) { return min; }
279+
if (value > max) { return max; }
280+
return value;
254281
}
255-
if (value < short.MinValue)
282+
283+
protected virtual void Dispose(bool disposing)
284+
{
285+
if (!_disposedValue)
256286
{
257-
return short.MinValue;
287+
if (disposing)
288+
{
289+
(_opusEncoder as IDisposable)?.Dispose();
290+
(_opusDecoder as IDisposable)?.Dispose();
291+
(_g729Encoder as IDisposable)?.Dispose();
292+
(_g729Decoder as IDisposable)?.Dispose();
293+
}
294+
295+
_disposedValue = true;
258296
}
259-
return (short)value;
260297
}
261298

262-
private float ClampToFloat(float value, float min, float max)
299+
public void Dispose()
263300
{
264-
if (value < min) { return min; }
265-
if (value > max) { return max; }
266-
return value;
301+
Dispose(disposing: true);
302+
GC.SuppressFinalize(this);
267303
}
268304
}
269305
}

src/app/Media/Codecs/G722Codec.cs

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,20 @@ public int Decode(G722CodecState state, short[] outputBuffer, byte[] inputG722Da
363363
/// <param name="inputBuffer">PCM 16 bit samples to encode</param>
364364
/// <param name="inputBufferCount">Number of samples in the input buffer to encode</param>
365365
/// <returns>Number of encoded bytes written into output buffer</returns>
366+
[Obsolete("Use Encode(G722CodecState, Span<byte>, ReadOnlySpan<byte>) instead.", false)]
366367
public int Encode(G722CodecState state, byte[] outputBuffer, short[] inputBuffer, int inputBufferCount)
368+
{
369+
return Encode(state, outputBuffer.AsSpan(), inputBuffer.AsSpan(0 , inputBufferCount));
370+
}
371+
372+
/// <summary>
373+
/// Encodes a buffer of G722
374+
/// </summary>
375+
/// <param name="state">Codec state</param>
376+
/// <param name="outputBuffer">Output buffer (to contain encoded G722)</param>
377+
/// <param name="inputBuffer">PCM 16 bit samples to encode</param>
378+
/// <returns>Number of encoded bytes written into output buffer</returns>
379+
public int Encode(G722CodecState state, Span<byte> outputBuffer, ReadOnlySpan<short> inputBuffer)
367380
{
368381
int dlow;
369382
int dhigh;
@@ -392,7 +405,7 @@ public int Encode(G722CodecState state, byte[] outputBuffer, short[] inputBuffer
392405

393406
g722_bytes = 0;
394407
xhigh = 0;
395-
for (j = 0; j < inputBufferCount;)
408+
for (j = 0; j < inputBuffer.Length;)
396409
{
397410
if (state.ItuTestMode)
398411
{
@@ -413,8 +426,10 @@ public int Encode(G722CodecState state, byte[] outputBuffer, short[] inputBuffer
413426
{
414427
state.QmfSignalHistory[i] = state.QmfSignalHistory[i + 2];
415428
}
429+
416430
state.QmfSignalHistory[22] = inputBuffer[j++];
417-
if (j < inputBufferCount)
431+
432+
if (j < inputBuffer.Length)
418433
{
419434
state.QmfSignalHistory[23] = inputBuffer[j++];
420435
}
@@ -432,10 +447,12 @@ public int Encode(G722CodecState state, byte[] outputBuffer, short[] inputBuffer
432447
sumodd += state.QmfSignalHistory[2 * i] * qmf_coeffs[i];
433448
sumeven += state.QmfSignalHistory[2 * i + 1] * qmf_coeffs[11 - i];
434449
}
450+
435451
xlow = (sumeven + sumodd) >> 14;
436452
xhigh = (sumeven - sumodd) >> 14;
437453
}
438454
}
455+
439456
// Block 1L, SUBTRA
440457
el = Saturate(xlow - state.Band[0].s);
441458

@@ -450,17 +467,18 @@ public int Encode(G722CodecState state, byte[] outputBuffer, short[] inputBuffer
450467
break;
451468
}
452469
}
470+
453471
ilow = (el < 0) ? iln[i] : ilp[i];
454472

455473
// Block 2L, INVQAL
456474
ril = ilow >> 2;
457475
wd2 = qm4[ril];
458476
dlow = (state.Band[0].det * wd2) >> 15;
459477

460-
// Block 3L, LOGSCL
461478
il4 = rl42[ril];
462479
wd = (state.Band[0].nb * 127) >> 7;
463480
state.Band[0].nb = wd + wl[il4];
481+
464482
if (state.Band[0].nb < 0)
465483
{
466484
state.Band[0].nb = 0;
@@ -470,7 +488,7 @@ public int Encode(G722CodecState state, byte[] outputBuffer, short[] inputBuffer
470488
state.Band[0].nb = 18432;
471489
}
472490

473-
// Block 3L, SCALEL
491+
474492
wd1 = (state.Band[0].nb >> 6) & 31;
475493
wd2 = 8 - (state.Band[0].nb >> 11);
476494
wd3 = (wd2 < 0) ? (ilb[wd1] << -wd2) : (ilb[wd1] >> wd2);
@@ -502,6 +520,7 @@ public int Encode(G722CodecState state, byte[] outputBuffer, short[] inputBuffer
502520
ih2 = rh2[ihigh];
503521
wd = (state.Band[1].nb * 127) >> 7;
504522
state.Band[1].nb = wd + wh[ih2];
523+
505524
if (state.Band[1].nb < 0)
506525
{
507526
state.Band[1].nb = 0;
@@ -518,6 +537,7 @@ public int Encode(G722CodecState state, byte[] outputBuffer, short[] inputBuffer
518537
state.Band[1].det = wd3 << 2;
519538

520539
Block4(state, 1, dhigh);
540+
521541
code = ((ihigh << 6) | ilow) >> (8 - state.BitsPerSample);
522542
}
523543

@@ -526,6 +546,7 @@ public int Encode(G722CodecState state, byte[] outputBuffer, short[] inputBuffer
526546
// Pack the code bits
527547
state.OutBuffer |= (uint)(code << state.OutBits);
528548
state.OutBits += state.BitsPerSample;
549+
529550
if (state.OutBits >= 8)
530551
{
531552
outputBuffer[g722_bytes++] = (byte)(state.OutBuffer & 0xFF);
@@ -536,8 +557,9 @@ public int Encode(G722CodecState state, byte[] outputBuffer, short[] inputBuffer
536557
else
537558
{
538559
outputBuffer[g722_bytes++] = (byte)code;
539-
}
540560
}
561+
}
562+
541563
return g722_bytes;
542564
}
543565
}

src/app/Media/Sources/VideoTestPatternSource.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,15 +234,15 @@ private void GenerateTestPattern(object state)
234234
GenerateRawSample(TEST_PATTERN_WIDTH, TEST_PATTERN_HEIGHT, _testI420Buffer);
235235
}
236236

237-
if (_videoEncoder != null && OnVideoSourceEncodedSample != null && !_formatManager.SelectedFormat.IsEmpty())
237+
if (_videoEncoder != null && OnVideoSourceEncodedSample is { } onVideoSourceEncodedSample && !_formatManager.SelectedFormat.IsEmpty())
238238
{
239239
var encodedBuffer = _videoEncoder.EncodeVideo(TEST_PATTERN_WIDTH, TEST_PATTERN_HEIGHT, _testI420Buffer, VideoPixelFormatsEnum.I420, _formatManager.SelectedFormat.Codec);
240240

241241
if (encodedBuffer != null)
242242
{
243243
uint fps = (_frameSpacing > 0) ? 1000 / (uint)_frameSpacing : DEFAULT_FRAMES_PER_SECOND;
244244
uint durationRtpTS = VIDEO_SAMPLING_RATE / fps;
245-
OnVideoSourceEncodedSample.Invoke(durationRtpTS, encodedBuffer);
245+
onVideoSourceEncodedSample.Invoke(durationRtpTS, encodedBuffer);
246246
}
247247
}
248248

0 commit comments

Comments
 (0)