-
Notifications
You must be signed in to change notification settings - Fork 280
SVE microbenchmarks with string operations #4841
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
4ab6c50
SVE microbenchmarks with string operations
jacob-crawley 7ca4047
Filtering string benchmarks to only run on aarch64
jacob-crawley fd85f63
Add check for SVE support in string benchmarks
jacob-crawley aca921b
Add filter to benchmarks for SVE support
jacob-crawley ee149b5
Naming changes to string benchmarks
jacob-crawley 98f535e
Supress SYSLIB5003 warning
jacob-crawley c774554
Remove sve tests from unsupported versions of .NET (<9.0)
jacob-crawley File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,222 @@ | ||
#pragma warning disable SYSLIB5003 | ||
|
||
using System; | ||
using System.Numerics; | ||
using System.Linq; | ||
using System.Diagnostics; | ||
using System.Runtime.Intrinsics; | ||
using System.Runtime.Intrinsics.Arm; | ||
using BenchmarkDotNet.Attributes; | ||
using BenchmarkDotNet.Extensions; | ||
using BenchmarkDotNet.Configs; | ||
using BenchmarkDotNet.Filters; | ||
using MicroBenchmarks; | ||
|
||
namespace SveBenchmarks | ||
{ | ||
[BenchmarkCategory(Categories.Runtime)] | ||
[OperatingSystemsArchitectureFilter(allowed: true, System.Runtime.InteropServices.Architecture.Arm64)] | ||
[Config(typeof(Config))] | ||
public class StrCmp | ||
{ | ||
private class Config : ManualConfig | ||
{ | ||
public Config() | ||
{ | ||
AddFilter(new SimpleFilter(_ => Sve.IsSupported)); | ||
} | ||
} | ||
|
||
[Params(15, 127, 527, 10015)] | ||
public int Size; | ||
|
||
[Params("Middle", "End", "None")] | ||
public string Modify; | ||
|
||
private byte[] _arr1, _arr2; | ||
|
||
[GlobalSetup] | ||
public virtual void Setup() | ||
{ | ||
_arr1 = ValuesGenerator.Array<byte>(Size); | ||
_arr2 = ValuesGenerator.Array<byte>(Size); | ||
|
||
switch (Modify) | ||
{ | ||
case "Middle": | ||
// modify arr1 value in the middle of the array | ||
_arr1[Size / 2] += 1; | ||
break; | ||
|
||
case "End": | ||
// modify arr2 value near the end of the array | ||
_arr2[Size - 1] += 1; | ||
break; | ||
|
||
case "None": | ||
// keep both arrays equal | ||
break; | ||
} | ||
} | ||
|
||
[Benchmark] | ||
public int Scalar() | ||
{ | ||
if (_arr1.Length == _arr2.Length) | ||
{ | ||
for (int i = 0; i < Size; i++) | ||
{ | ||
if (_arr1[i] != _arr2[i]) | ||
return _arr1[i] - _arr2[i]; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
Debug.Assert(false, "Different array lengths are not expected"); | ||
return 0; | ||
} | ||
|
||
[Benchmark] | ||
public int Vector128StrCmp() | ||
{ | ||
int incr = Vector128<byte>.Count; | ||
int i = 0; | ||
|
||
if (_arr1.Length == _arr2.Length) | ||
{ | ||
for (; i <= Size - incr; i += incr) | ||
{ | ||
Vector128<byte> arr1_vals = Vector128.LoadUnsafe(ref _arr1[i]); | ||
Vector128<byte> arr2_vals = Vector128.LoadUnsafe(ref _arr2[i]); | ||
|
||
bool allEqual = Vector128.EqualsAll(arr1_vals, arr2_vals); | ||
|
||
if (!allEqual) | ||
{ | ||
break; | ||
} | ||
} | ||
|
||
// fall back to scalar for remaining values | ||
for (; i < Size; i++) | ||
{ | ||
if (_arr1[i] != _arr2[i]) | ||
return _arr1[i] - _arr2[i]; | ||
} | ||
return 0; | ||
} | ||
|
||
Debug.Assert(false, "Different array lengths are not expected"); | ||
return 0; | ||
} | ||
|
||
|
||
[Benchmark] | ||
public unsafe long SveStrCmp() | ||
{ | ||
if (Sve.IsSupported) | ||
{ | ||
int i = 0; | ||
int elemsInVector = (int)Sve.Count8BitElements(); | ||
|
||
Vector<byte> ptrue = Sve.CreateTrueMaskByte(); | ||
Vector<byte> pLoop = (Vector<byte>)Sve.CreateWhileLessThanMask8Bit(i, Size); | ||
Vector<byte> cmp = Vector<byte>.Zero; | ||
Vector<byte> arr1_data, arr2_data; | ||
|
||
if (_arr1.Length == _arr2.Length) | ||
{ | ||
fixed (byte* arr1_ptr = _arr1, arr2_ptr = _arr2) | ||
{ | ||
while (Sve.TestFirstTrue(ptrue, pLoop)) | ||
{ | ||
arr1_data = Sve.LoadVector(pLoop, arr1_ptr + i); | ||
arr2_data = Sve.LoadVector(pLoop, arr2_ptr + i); | ||
|
||
// stop if any values arent equal | ||
cmp = Sve.CompareNotEqualTo(arr1_data, arr2_data); | ||
|
||
if (Sve.TestAnyTrue(ptrue, cmp)) | ||
break; | ||
|
||
i += elemsInVector; | ||
|
||
pLoop = (Vector<byte>)Sve.CreateWhileLessThanMask8Bit(i, Size); | ||
} | ||
|
||
// create a bitmask to find position of changed value | ||
int mask = 0; | ||
for (int j = 0; j < elemsInVector; j++) | ||
{ | ||
// set bits in lanes with non zero elements | ||
if (cmp.GetElement(j) != 0) | ||
mask |= (1 << j); | ||
} | ||
|
||
int zeroCount = BitOperations.TrailingZeroCount(mask); | ||
|
||
if (zeroCount < elemsInVector) | ||
return _arr1[i + zeroCount] - _arr2[i + zeroCount]; | ||
|
||
return 0; | ||
} | ||
} | ||
|
||
Debug.Assert(false, "Different array lengths are not expected"); | ||
return 0; | ||
} | ||
return 0; | ||
} | ||
|
||
[Benchmark] | ||
public unsafe long SveTail() | ||
{ | ||
if (Sve.IsSupported) | ||
{ | ||
Vector<byte> ptrue = Sve.CreateTrueMaskByte(); | ||
Vector<byte> cmp; | ||
Vector<byte> arr1_data, arr2_data; | ||
|
||
int i = 0; | ||
int elemsInVector = (int)Sve.Count8BitElements(); | ||
|
||
if (_arr1.Length == _arr2.Length) | ||
{ | ||
fixed (byte* arr1_ptr = _arr1, arr2_ptr = _arr2) | ||
{ | ||
for (; i <= Size - elemsInVector; i += elemsInVector) | ||
{ | ||
arr1_data = Sve.LoadVector(ptrue, arr1_ptr + i); | ||
arr2_data = Sve.LoadVector(ptrue, arr2_ptr + i); | ||
|
||
cmp = Sve.CompareNotEqualTo(arr1_data, arr2_data); | ||
|
||
byte allEqual = (byte)Sve.AddAcross(cmp).ToScalar(); | ||
|
||
if (allEqual > 0) | ||
{ | ||
break; | ||
} | ||
} | ||
|
||
for (; i < Size; i++) | ||
{ | ||
if (_arr1[i] != _arr2[i]) | ||
return _arr1[i] - _arr2[i]; | ||
} | ||
|
||
return 0; | ||
} | ||
} | ||
|
||
Debug.Assert(false, "Different array lengths are not expected"); | ||
return 0; | ||
} | ||
|
||
return 0; | ||
} | ||
} | ||
} | ||
|
||
#pragma warning restore SYSLIB5003 |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The benchmarks that are part of this repo are used to determine whether there is any performance regression in the .NET. Running this scalar benchmark every day multiple times would rather not catch any regression. So I would focus purely on the ones that use
Sve
directly and indirectly (via Vector types if possible)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thoughts...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with @a74nh. The point of adding scalar version is not to catch any regression in that code, but compare the improvements we do using Vector128/Sve APIs.