-
Notifications
You must be signed in to change notification settings - Fork 280
SVE microbenchmarks with string operations #4841
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
jacob-crawley
wants to merge
5
commits into
dotnet:main
Choose a base branch
from
jacob-crawley:github-sve-benchmarks
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
4ab6c50
SVE microbenchmarks with string operations
jacob-crawley 7ca4047
Filtering string benchmarks to only run on aarch64
jacob-crawley fd85f63
Add check for SVE support in string benchmarks
jacob-crawley aca921b
Add filter to benchmarks for SVE support
jacob-crawley ee149b5
Naming changes to string benchmarks
jacob-crawley File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,218 @@ | ||
using System; | ||
using System.Numerics; | ||
using System.Linq; | ||
using System.Diagnostics; | ||
using System.Runtime.Intrinsics; | ||
using System.Runtime.Intrinsics.Arm; | ||
using BenchmarkDotNet.Attributes; | ||
using BenchmarkDotNet.Extensions; | ||
using BenchmarkDotNet.Configs; | ||
using BenchmarkDotNet.Filters; | ||
using MicroBenchmarks; | ||
|
||
namespace SveBenchmarks | ||
{ | ||
[BenchmarkCategory(Categories.Runtime)] | ||
[OperatingSystemsArchitectureFilter(allowed: true, System.Runtime.InteropServices.Architecture.Arm64)] | ||
[Config(typeof(Config))] | ||
public class StrCmp | ||
{ | ||
private class Config : ManualConfig | ||
{ | ||
public Config() | ||
{ | ||
AddFilter(new SimpleFilter(_ => Sve.IsSupported)); | ||
} | ||
} | ||
|
||
[Params(15, 127, 527, 10015)] | ||
public int Size; | ||
|
||
[Params("Middle", "End", "None")] | ||
public string Modify; | ||
|
||
private byte[] _arr1, _arr2; | ||
|
||
[GlobalSetup] | ||
public virtual void Setup() | ||
{ | ||
_arr1 = ValuesGenerator.Array<byte>(Size); | ||
_arr2 = ValuesGenerator.Array<byte>(Size); | ||
|
||
switch (Modify) | ||
{ | ||
case "Middle": | ||
// modify arr1 value in the middle of the array | ||
_arr1[Size / 2] += 1; | ||
break; | ||
|
||
case "End": | ||
// modify arr2 value near the end of the array | ||
_arr2[Size - 1] += 1; | ||
break; | ||
|
||
case "None": | ||
// keep both arrays equal | ||
break; | ||
} | ||
} | ||
|
||
[Benchmark] | ||
public int Scalar() | ||
{ | ||
if (_arr1.Length == _arr2.Length) | ||
{ | ||
for (int i = 0; i < Size; i++) | ||
{ | ||
if (_arr1[i] != _arr2[i] ) | ||
return _arr1[i] - _arr2[i]; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
Debug.Assert(false, "Different array lengths are not expected"); | ||
return 0; | ||
} | ||
|
||
[Benchmark] | ||
public int Vector128StrCmp() | ||
{ | ||
int incr = Vector128<byte>.Count; | ||
int i = 0; | ||
|
||
if (_arr1.Length == _arr2.Length) | ||
{ | ||
for (; i <= Size - incr; i += incr) | ||
{ | ||
Vector128<byte> arr1_vals = Vector128.LoadUnsafe(ref _arr1[i]); | ||
Vector128<byte> arr2_vals = Vector128.LoadUnsafe(ref _arr2[i]); | ||
|
||
bool allEqual = Vector128.EqualsAll(arr1_vals, arr2_vals); | ||
|
||
if (!allEqual) | ||
{ | ||
break; | ||
} | ||
} | ||
|
||
// fall back to scalar for remaining values | ||
for (; i < Size; i++) | ||
{ | ||
if (_arr1[i] != _arr2[i] ) | ||
return _arr1[i] - _arr2[i]; | ||
} | ||
return 0; | ||
} | ||
|
||
Debug.Assert(false, "Different array lengths are not expected"); | ||
return 0; | ||
} | ||
|
||
|
||
[Benchmark] | ||
public unsafe long SveStrCmp() | ||
{ | ||
if (Sve.IsSupported) | ||
{ | ||
int i = 0; | ||
int elemsInVector = (int)Sve.Count8BitElements(); | ||
|
||
Vector<byte> ptrue = Sve.CreateTrueMaskByte(); | ||
Vector<byte> pLoop = (Vector<byte>)Sve.CreateWhileLessThanMask8Bit(i, Size); | ||
Vector<byte> cmp = Vector<byte>.Zero; | ||
Vector<byte> arr1_data, arr2_data; | ||
|
||
if (_arr1.Length == _arr2.Length) | ||
{ | ||
fixed (byte* arr1_ptr = _arr1, arr2_ptr = _arr2) | ||
{ | ||
while (Sve.TestFirstTrue(ptrue, pLoop)) | ||
{ | ||
arr1_data = Sve.LoadVector(pLoop, arr1_ptr + i); | ||
arr2_data = Sve.LoadVector(pLoop, arr2_ptr + i); | ||
|
||
// stop if any values arent equal | ||
cmp = Sve.CompareNotEqualTo(arr1_data, arr2_data); | ||
|
||
if (Sve.TestAnyTrue(ptrue, cmp)) | ||
break; | ||
|
||
i += elemsInVector; | ||
|
||
pLoop = (Vector<byte>)Sve.CreateWhileLessThanMask8Bit(i, Size); | ||
} | ||
|
||
// create a bitmask to find position of changed value | ||
int mask = 0; | ||
for (int j = 0; j < elemsInVector; j++) | ||
{ | ||
// set bits in lanes with non zero elements | ||
if (cmp.GetElement(j) != 0) | ||
mask |= (1 << j); | ||
} | ||
|
||
int zeroCount = BitOperations.TrailingZeroCount(mask); | ||
|
||
if (zeroCount < elemsInVector) | ||
return _arr1[i+zeroCount] - _arr2[i+zeroCount]; | ||
|
||
return 0; | ||
} | ||
} | ||
|
||
Debug.Assert(false, "Different array lengths are not expected"); | ||
return 0; | ||
} | ||
return 0; | ||
} | ||
|
||
[Benchmark] | ||
public unsafe long SveTail() | ||
{ | ||
if (Sve.IsSupported) | ||
{ | ||
Vector<byte> ptrue = Sve.CreateTrueMaskByte(); | ||
Vector<byte> cmp; | ||
Vector<byte> arr1_data, arr2_data; | ||
|
||
int i = 0; | ||
int elemsInVector = (int)Sve.Count8BitElements(); | ||
|
||
if (_arr1.Length == _arr2.Length) | ||
{ | ||
fixed (byte* arr1_ptr = _arr1, arr2_ptr = _arr2) | ||
{ | ||
for (; i <= Size - elemsInVector; i += elemsInVector) | ||
{ | ||
arr1_data = Sve.LoadVector(ptrue, arr1_ptr + i); | ||
arr2_data = Sve.LoadVector(ptrue, arr2_ptr + i); | ||
|
||
cmp = Sve.CompareNotEqualTo(arr1_data, arr2_data); | ||
|
||
byte allEqual = (byte)Sve.AddAcross(cmp).ToScalar(); | ||
|
||
if (allEqual > 0) | ||
{ | ||
break; | ||
} | ||
} | ||
|
||
for (; i < Size; i++) | ||
{ | ||
if (_arr1[i] != _arr2[i] ) | ||
return _arr1[i] - _arr2[i]; | ||
} | ||
|
||
return 0; | ||
} | ||
} | ||
|
||
Debug.Assert(false, "Different array lengths are not expected"); | ||
return 0; | ||
} | ||
|
||
return 0; | ||
} | ||
} | ||
} |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The benchmarks that are part of this repo are used to determine whether there is any performance regression in the .NET. Running this scalar benchmark every day multiple times would rather not catch any regression. So I would focus purely on the ones that use
Sve
directly and indirectly (via Vector types if possible)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thoughts...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with @a74nh. The point of adding scalar version is not to catch any regression in that code, but compare the improvements we do using Vector128/Sve APIs.