diff --git a/src/benchmarks/micro/sve/StrCmp.cs b/src/benchmarks/micro/sve/StrCmp.cs new file mode 100644 index 00000000000..4e43c996e4a --- /dev/null +++ b/src/benchmarks/micro/sve/StrCmp.cs @@ -0,0 +1,222 @@ +#pragma warning disable SYSLIB5003 + +using System; +using System.Numerics; +using System.Linq; +using System.Diagnostics; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Extensions; +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Filters; +using MicroBenchmarks; + +namespace SveBenchmarks +{ + [BenchmarkCategory(Categories.Runtime)] + [OperatingSystemsArchitectureFilter(allowed: true, System.Runtime.InteropServices.Architecture.Arm64)] + [Config(typeof(Config))] + public class StrCmp + { + private class Config : ManualConfig + { + public Config() + { + AddFilter(new SimpleFilter(_ => Sve.IsSupported)); + } + } + + [Params(15, 127, 527, 10015)] + public int Size; + + [Params("Middle", "End", "None")] + public string Modify; + + private byte[] _arr1, _arr2; + + [GlobalSetup] + public virtual void Setup() + { + _arr1 = ValuesGenerator.Array(Size); + _arr2 = ValuesGenerator.Array(Size); + + switch (Modify) + { + case "Middle": + // modify arr1 value in the middle of the array + _arr1[Size / 2] += 1; + break; + + case "End": + // modify arr2 value near the end of the array + _arr2[Size - 1] += 1; + break; + + case "None": + // keep both arrays equal + break; + } + } + + [Benchmark] + public int Scalar() + { + if (_arr1.Length == _arr2.Length) + { + for (int i = 0; i < Size; i++) + { + if (_arr1[i] != _arr2[i]) + return _arr1[i] - _arr2[i]; + } + + return 0; + } + + Debug.Assert(false, "Different array lengths are not expected"); + return 0; + } + + [Benchmark] + public int Vector128StrCmp() + { + int incr = Vector128.Count; + int i = 0; + + if (_arr1.Length == _arr2.Length) + { + for (; i <= Size - incr; i += incr) + { + Vector128 arr1_vals = Vector128.LoadUnsafe(ref _arr1[i]); + Vector128 arr2_vals = Vector128.LoadUnsafe(ref _arr2[i]); + + bool allEqual = Vector128.EqualsAll(arr1_vals, arr2_vals); + + if (!allEqual) + { + break; + } + } + + // fall back to scalar for remaining values + for (; i < Size; i++) + { + if (_arr1[i] != _arr2[i]) + return _arr1[i] - _arr2[i]; + } + return 0; + } + + Debug.Assert(false, "Different array lengths are not expected"); + return 0; + } + + + [Benchmark] + public unsafe long SveStrCmp() + { + if (Sve.IsSupported) + { + int i = 0; + int elemsInVector = (int)Sve.Count8BitElements(); + + Vector ptrue = Sve.CreateTrueMaskByte(); + Vector pLoop = (Vector)Sve.CreateWhileLessThanMask8Bit(i, Size); + Vector cmp = Vector.Zero; + Vector arr1_data, arr2_data; + + if (_arr1.Length == _arr2.Length) + { + fixed (byte* arr1_ptr = _arr1, arr2_ptr = _arr2) + { + while (Sve.TestFirstTrue(ptrue, pLoop)) + { + arr1_data = Sve.LoadVector(pLoop, arr1_ptr + i); + arr2_data = Sve.LoadVector(pLoop, arr2_ptr + i); + + // stop if any values arent equal + cmp = Sve.CompareNotEqualTo(arr1_data, arr2_data); + + if (Sve.TestAnyTrue(ptrue, cmp)) + break; + + i += elemsInVector; + + pLoop = (Vector)Sve.CreateWhileLessThanMask8Bit(i, Size); + } + + // create a bitmask to find position of changed value + int mask = 0; + for (int j = 0; j < elemsInVector; j++) + { + // set bits in lanes with non zero elements + if (cmp.GetElement(j) != 0) + mask |= (1 << j); + } + + int zeroCount = BitOperations.TrailingZeroCount(mask); + + if (zeroCount < elemsInVector) + return _arr1[i + zeroCount] - _arr2[i + zeroCount]; + + return 0; + } + } + + Debug.Assert(false, "Different array lengths are not expected"); + return 0; + } + return 0; + } + + [Benchmark] + public unsafe long SveTail() + { + if (Sve.IsSupported) + { + Vector ptrue = Sve.CreateTrueMaskByte(); + Vector cmp; + Vector arr1_data, arr2_data; + + int i = 0; + int elemsInVector = (int)Sve.Count8BitElements(); + + if (_arr1.Length == _arr2.Length) + { + fixed (byte* arr1_ptr = _arr1, arr2_ptr = _arr2) + { + for (; i <= Size - elemsInVector; i += elemsInVector) + { + arr1_data = Sve.LoadVector(ptrue, arr1_ptr + i); + arr2_data = Sve.LoadVector(ptrue, arr2_ptr + i); + + cmp = Sve.CompareNotEqualTo(arr1_data, arr2_data); + + byte allEqual = (byte)Sve.AddAcross(cmp).ToScalar(); + + if (allEqual > 0) + { + break; + } + } + + for (; i < Size; i++) + { + if (_arr1[i] != _arr2[i]) + return _arr1[i] - _arr2[i]; + } + + return 0; + } + } + + Debug.Assert(false, "Different array lengths are not expected"); + return 0; + } + + return 0; + } + } +} + +#pragma warning restore SYSLIB5003 \ No newline at end of file diff --git a/src/benchmarks/micro/sve/StrIndexOf.cs b/src/benchmarks/micro/sve/StrIndexOf.cs new file mode 100644 index 00000000000..214d90e6b4f --- /dev/null +++ b/src/benchmarks/micro/sve/StrIndexOf.cs @@ -0,0 +1,189 @@ +#pragma warning disable SYSLIB5003 + +using System; +using System.Numerics; +using System.Linq; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Extensions; +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Filters; +using MicroBenchmarks; + +namespace SveBenchmarks +{ + [BenchmarkCategory(Categories.Runtime)] + [OperatingSystemsArchitectureFilter(allowed: true, System.Runtime.InteropServices.Architecture.Arm64)] + [Config(typeof(Config))] + public class StrIndexOf + { + private class Config : ManualConfig + { + public Config() + { + AddFilter(new SimpleFilter(_ => Sve.IsSupported)); + } + } + + [Params(15, 127, 527, 10015)] + public int Size; + + private char[] _array; + private char _searchValue; + + [GlobalSetup] + public virtual void Setup() + { + _array = Enumerable.Range(1, Size) + .Select(i => (char) i) + .ToArray(); + _searchValue = _array[Size / 2]; + } + + [Benchmark] + public int Scalar() + { + for (int i = 0; i < _array.Length; i++) + { + if (_array[i] == _searchValue) + { + return i; + } + } + return -1; + } + + [Benchmark] + public unsafe int Vector128IndexOf() + { + int incr = Vector128.Count; + int i = 0; + + + fixed (char* arr_ptr = _array) + { + Vector128 target = Vector128.Create((ushort)_searchValue); + + for (; i <= Size - incr; i += incr) + { + Vector128 vals = Vector128.Load(((ushort*)arr_ptr) + i); + + // Compare each vector value with the target + Vector128 cmp = Vector128.Equals(vals, target); + + ushort cmpSum = Vector128.Sum(cmp); + + if (cmpSum > 0) + { + // find index of matching item + for (int j = 0; j < incr; j++) + { + if (cmp.GetElement(j) == ushort.MaxValue) + { + return i + j; + } + } + } + } + + // Search the remaining values + for (; i < Size; i++) + { + if (_array[i] == _searchValue) + return i; + } + + return -1; + } + } + + [Benchmark] + public unsafe int SveIndexOf() + { + if (Sve.IsSupported) + { + int i = 0; + + fixed (char* arr_ptr = _array) + { + Vector target = new Vector((ushort)_searchValue); + var pLoop = (Vector)Sve.CreateWhileLessThanMask16Bit(i, Size); + + for (; Sve.TestFirstTrue(Sve.CreateTrueMaskUInt16(), pLoop); i += (int)Sve.Count16BitElements()) + { + Vector vals = Sve.LoadVector(pLoop, ((ushort*)arr_ptr) + i); + Vector cmpVec = Sve.CompareEqual(vals, target); + + ushort cmpSum = (ushort)Sve.AddAcross(cmpVec).ToScalar(); + + if (cmpSum > 0) + { + // find index of matching item + for (int j = 0; j < Vector.Count; j++) + { + if (cmpVec.GetElement(j) == 1) + { + return i + j; + } + } + } + + pLoop = (Vector)Sve.CreateWhileLessThanMask16Bit(i, Size); + } + } + } + + return -1; + } + + [Benchmark] + public unsafe int SveTail() + { + if (Sve.IsSupported) + { + int i = 0; + + fixed (char* arr_ptr = _array) + { + Vector target = new Vector((ushort)_searchValue); + var pLoop = (Vector)Sve.CreateTrueMaskInt16(); + + + for (; (Size - i) > (int)Sve.Count16BitElements(); i += (int)Sve.Count16BitElements()) + { + Vector vals = Sve.LoadVector(pLoop, ((ushort*)arr_ptr) + i); + Vector cmpVec = Sve.CompareEqual(vals, target); + + ushort cmpSum = (ushort)Sve.AddAcross(cmpVec).ToScalar(); + + if (cmpSum > 0) + { + // find index of matching item + for (int j = 0; j < Vector.Count; j++) + { + if (cmpVec.GetElement(j) == 1) + { + return i + j; + } + } + } + } + + for (; i < Size; i++) + { + if (_array[i] == _searchValue) + return i; + } + + return -1; + } + } + + return -1; + } + + } +} + +#pragma warning restore SYSLIB5003 \ No newline at end of file diff --git a/src/benchmarks/micro/sve/StrLen.cs b/src/benchmarks/micro/sve/StrLen.cs new file mode 100644 index 00000000000..246bc64d834 --- /dev/null +++ b/src/benchmarks/micro/sve/StrLen.cs @@ -0,0 +1,162 @@ +#pragma warning disable SYSLIB5003 + +using System; +using System.Numerics; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Extensions; +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Filters; +using MicroBenchmarks; + +namespace SveBenchmarks +{ + [BenchmarkCategory(Categories.Runtime)] + [OperatingSystemsArchitectureFilter(allowed: true, System.Runtime.InteropServices.Architecture.Arm64)] + [Config(typeof(Config))] + public class StrLen + { + private class Config : ManualConfig + { + public Config() + { + AddFilter(new SimpleFilter(_ => Sve.IsSupported)); + } + } + + [Params(15, 127, 527, 10015)] + public int Size; + + private byte[] _array; + private ulong _length; + + [GlobalSetup] + public virtual void Setup() + { + _array = ValuesGenerator.Array(Size + 1); + _length = 0; + + var random = new Random(); + for (int i = 0; i < _array.Length; i++) + { + // Replaces any zero elements with a random value + if (_array[i] == 0) + { + _array[i] = (byte)random.Next(1, byte.MaxValue); + } + } + + _array[Size] = 0; // add zero to the end to simulate a terminated string + + } + + [Benchmark] + public unsafe ulong Scalar() + { + fixed (byte* arr_ptr = _array) + { + if (arr_ptr == null) + return 0; + + byte* ptr = arr_ptr; + + while (*ptr != 0) + { + _length++; + ptr++; + } + } + + return _length; + } + + [Benchmark] + public unsafe ulong Vector128StrLen() + { + Vector128 data = Vector128.Zero; + ulong cmp = 0; + ulong i = 0; + ulong alignOffset = 0; + + fixed (byte* ptr = _array) + { + byte* arr_ptr = ptr; + + // Check for a zero in first 16 bytes + for (i = 0; i < 16; i++) + { + if (arr_ptr[i] == 0) + { + return i; + } + } + + // look for a zero in the next 16 byte block + while (cmp == 0) + { + data = Vector128.Load(arr_ptr + i); + Vector128 min = AdvSimd.Arm64.MinPairwise(data, data); + Vector64 cmpVec = Vector64.Equals(min.GetLower(), Vector64.Zero); + + cmp = cmpVec.AsUInt64().ToScalar(); + + i = i + (ulong)(sizeof(Vector128) / sizeof(byte)); + } + + // once a zero is found, go back one 16-byte block and find location of the zero + i = i - (ulong)(sizeof(Vector128) / sizeof(byte)); + + Vector128 cmpVecLoc = AdvSimd.CompareEqual(data, Vector128.Zero); + + Vector64 shifted = AdvSimd.ShiftRightLogicalNarrowingLower( + cmpVecLoc.AsUInt16(), + 4 + ); + + ulong syncd = shifted.AsUInt64().ToScalar(); + int count = BitOperations.TrailingZeroCount(syncd); + + return i + (ulong)(count / 4) + alignOffset; + } + } + + [Benchmark] + public unsafe ulong SveStrLen() + { + if (Sve.IsSupported) + { + Vector ptrue = Sve.CreateTrueMaskByte(); + Vector cmp, data; + + ulong i = 0; + ulong elemsInVector = Sve.Count8BitElements(); + + Vector pLoop = (Vector)Sve.CreateWhileLessThanMask8Bit((int)i, Size); + + fixed (byte* arr_ptr = _array) + { + while (true) + { + data = Sve.LoadVector(pLoop, arr_ptr + i); + cmp = Sve.CompareEqual(data, Vector.Zero); + + if (Sve.TestAnyTrue(ptrue, cmp)) + break; + else + { + i += elemsInVector; + pLoop = (Vector)Sve.CreateWhileLessThanMask8Bit((int)i, Size); + } + } + + i += Sve.GetActiveElementCount(pLoop, data); + return i; + } + } + return 0; + } + } +} + +#pragma warning restore SYSLIB5003 \ No newline at end of file