Skip to content

SVE microbenchmarks with string operations #4841

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
218 changes: 218 additions & 0 deletions src/benchmarks/micro/sve/StrCmp.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
using System;
using System.Numerics;
using System.Linq;
using System.Diagnostics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Extensions;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Filters;
using MicroBenchmarks;

namespace SveBenchmarks
{
[BenchmarkCategory(Categories.Runtime)]
[OperatingSystemsArchitectureFilter(allowed: true, System.Runtime.InteropServices.Architecture.Arm64)]
[Config(typeof(Config))]
public class StrCmp
{
private class Config : ManualConfig
{
public Config()
{
AddFilter(new SimpleFilter(_ => Sve.IsSupported));
}
}

[Params(15, 127, 527, 10015)]
public int Size;

[Params("Middle", "End", "None")]
public string Modify;

private byte[] _arr1, _arr2;

[GlobalSetup]
public virtual void Setup()
{
_arr1 = ValuesGenerator.Array<byte>(Size);
_arr2 = ValuesGenerator.Array<byte>(Size);

switch (Modify)
{
case "Middle":
// modify arr1 value in the middle of the array
_arr1[Size / 2] += 1;
break;

case "End":
// modify arr2 value near the end of the array
_arr2[Size - 1] += 1;
break;

case "None":
// keep both arrays equal
break;
}
}

[Benchmark]
public int Scalar()
{
if (_arr1.Length == _arr2.Length)
{
for (int i = 0; i < Size; i++)
{
if (_arr1[i] != _arr2[i] )
return _arr1[i] - _arr2[i];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The benchmarks that are part of this repo are used to determine whether there is any performance regression in the .NET. Running this scalar benchmark every day multiple times would rather not catch any regression. So I would focus purely on the ones that use Sve directly and indirectly (via Vector types if possible)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thoughts...

  • They are giving comparison point so we can easily show the advantage given by using intrinsics. Knowing an SVE loop is slightly slower than Vector128, but still massively faster than scalar I think is useful.
  • If C# started to add loop optimisations / auto vectorisation then the gap between scalar and intrinsics will start to close.
  • There are some loops (not in this PR) that cannot easily be optimised via vector128 (eg the partition used by a quicksort). For those we definitely want scalar versions.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @a74nh. The point of adding scalar version is not to catch any regression in that code, but compare the improvements we do using Vector128/Sve APIs.

}

return 0;
}

Debug.Assert(false, "Different array lengths are not expected");
return 0;
}

[Benchmark]
public int Vector128StrCmp()
{
int incr = Vector128<byte>.Count;
int i = 0;

if (_arr1.Length == _arr2.Length)
{
for (; i <= Size - incr; i += incr)
{
Vector128<byte> arr1_vals = Vector128.LoadUnsafe(ref _arr1[i]);
Vector128<byte> arr2_vals = Vector128.LoadUnsafe(ref _arr2[i]);

bool allEqual = Vector128.EqualsAll(arr1_vals, arr2_vals);

if (!allEqual)
{
break;
}
}

// fall back to scalar for remaining values
for (; i < Size; i++)
{
if (_arr1[i] != _arr2[i] )
return _arr1[i] - _arr2[i];
}
return 0;
}

Debug.Assert(false, "Different array lengths are not expected");
return 0;
}


[Benchmark]
public unsafe long SveStrCmp()
{
if (Sve.IsSupported)
{
int i = 0;
int elemsInVector = (int)Sve.Count8BitElements();

Vector<byte> ptrue = Sve.CreateTrueMaskByte();
Vector<byte> pLoop = (Vector<byte>)Sve.CreateWhileLessThanMask8Bit(i, Size);
Vector<byte> cmp = Vector<byte>.Zero;
Vector<byte> arr1_data, arr2_data;

if (_arr1.Length == _arr2.Length)
{
fixed (byte* arr1_ptr = _arr1, arr2_ptr = _arr2)
{
while (Sve.TestFirstTrue(ptrue, pLoop))
{
arr1_data = Sve.LoadVector(pLoop, arr1_ptr + i);
arr2_data = Sve.LoadVector(pLoop, arr2_ptr + i);

// stop if any values arent equal
cmp = Sve.CompareNotEqualTo(arr1_data, arr2_data);

if (Sve.TestAnyTrue(ptrue, cmp))
break;

i += elemsInVector;

pLoop = (Vector<byte>)Sve.CreateWhileLessThanMask8Bit(i, Size);
}

// create a bitmask to find position of changed value
int mask = 0;
for (int j = 0; j < elemsInVector; j++)
{
// set bits in lanes with non zero elements
if (cmp.GetElement(j) != 0)
mask |= (1 << j);
}

int zeroCount = BitOperations.TrailingZeroCount(mask);

if (zeroCount < elemsInVector)
return _arr1[i+zeroCount] - _arr2[i+zeroCount];

return 0;
}
}

Debug.Assert(false, "Different array lengths are not expected");
return 0;
}
return 0;
}

[Benchmark]
public unsafe long SveTail()
{
if (Sve.IsSupported)
{
Vector<byte> ptrue = Sve.CreateTrueMaskByte();
Vector<byte> cmp;
Vector<byte> arr1_data, arr2_data;

int i = 0;
int elemsInVector = (int)Sve.Count8BitElements();

if (_arr1.Length == _arr2.Length)
{
fixed (byte* arr1_ptr = _arr1, arr2_ptr = _arr2)
{
for (; i <= Size - elemsInVector; i += elemsInVector)
{
arr1_data = Sve.LoadVector(ptrue, arr1_ptr + i);
arr2_data = Sve.LoadVector(ptrue, arr2_ptr + i);

cmp = Sve.CompareNotEqualTo(arr1_data, arr2_data);

byte allEqual = (byte)Sve.AddAcross(cmp).ToScalar();

if (allEqual > 0)
{
break;
}
}

for (; i < Size; i++)
{
if (_arr1[i] != _arr2[i] )
return _arr1[i] - _arr2[i];
}

return 0;
}
}

Debug.Assert(false, "Different array lengths are not expected");
return 0;
}

return 0;
}
}
}
Loading
Loading