|
| 1 | +using System.Globalization; |
| 2 | +using System.Runtime.InteropServices; |
| 3 | +using BenchmarkDotNet.Attributes; |
| 4 | +using Umbraco.Cms.Core; |
| 5 | + |
| 6 | +namespace Umbraco.Tests.Benchmarks; |
| 7 | + |
| 8 | +[MemoryDiagnoser] |
| 9 | +public class StringExtensionsBenchmarks |
| 10 | +{ |
| 11 | + private static readonly Random _seededRandom = new(60); |
| 12 | + private const int Size = 100; |
| 13 | + private static readonly string[] _stringsWithCommaSeparatedNumbers = new string[Size]; |
| 14 | + |
| 15 | + static StringExtensionsBenchmarks() |
| 16 | + { |
| 17 | + for (var i = 0; i < Size; i++) |
| 18 | + { |
| 19 | + int countOfNumbers = _seededRandom.Next(2, 10); // guess on path lengths in normal use |
| 20 | + int[] randomIds = new int[countOfNumbers]; |
| 21 | + for (var i1 = 0; i1 < countOfNumbers; i1++) |
| 22 | + { |
| 23 | + randomIds[i1] = _seededRandom.Next(-1, int.MaxValue); |
| 24 | + } |
| 25 | + |
| 26 | + _stringsWithCommaSeparatedNumbers[i] = string.Join(',', randomIds); |
| 27 | + } |
| 28 | + } |
| 29 | + |
| 30 | + /// <summary> |
| 31 | + /// Ye olden way of doing it (before 20250201 https://github.com/umbraco/Umbraco-CMS/pull/18048) |
| 32 | + /// </summary> |
| 33 | + /// <returns>A number so the compiler/runtime doesn't optimize it away.</returns> |
| 34 | + [Benchmark] |
| 35 | + public int Linq() |
| 36 | + { |
| 37 | + var totalNumberOfIds = 0; // a number to operate on so it is not optimized away |
| 38 | + foreach (string? stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) |
| 39 | + { |
| 40 | + totalNumberOfIds += Linq(stringWithCommaSeparatedNumbers).Length; |
| 41 | + } |
| 42 | + |
| 43 | + return totalNumberOfIds; |
| 44 | + } |
| 45 | + |
| 46 | + private static int[] Linq(string path) |
| 47 | + { |
| 48 | + int[]? nodeIds = path.Split(Constants.CharArrays.Comma, StringSplitOptions.RemoveEmptyEntries) |
| 49 | + .Select(x => |
| 50 | + int.TryParse(x, NumberStyles.Integer, CultureInfo.InvariantCulture, out var output) |
| 51 | + ? Attempt<int>.Succeed(output) |
| 52 | + : Attempt<int>.Fail()) |
| 53 | + .Where(x => x.Success) |
| 54 | + .Select(x => x.Result) |
| 55 | + .Reverse() |
| 56 | + .ToArray(); |
| 57 | + return nodeIds; |
| 58 | + } |
| 59 | + |
| 60 | + /// <summary> |
| 61 | + /// Here we are allocating strings to the separated values, |
| 62 | + /// BUT we know the count of numbers, so we can allocate the exact size of list we need |
| 63 | + /// </summary> |
| 64 | + /// <returns>A number so the compiler/runtime doesn't optimize it away.</returns> |
| 65 | + [Benchmark] |
| 66 | + public int SplitToHeapStrings() |
| 67 | + { |
| 68 | + var totalNumberOfIds = 0; // a number to operate on so it is not optimized away |
| 69 | + foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) |
| 70 | + { |
| 71 | + totalNumberOfIds += SplitToHeapStrings(stringWithCommaSeparatedNumbers).Length; |
| 72 | + } |
| 73 | + |
| 74 | + return totalNumberOfIds; |
| 75 | + } |
| 76 | + |
| 77 | + private static int[] SplitToHeapStrings(string path) |
| 78 | + { |
| 79 | + string[] pathSegments = path.Split(Constants.CharArrays.Comma, StringSplitOptions.RemoveEmptyEntries); |
| 80 | + List<int> nodeIds = new(pathSegments.Length); // here we know how large the resulting list should at least be |
| 81 | + for (int i = pathSegments.Length - 1; i >= 0; i--) |
| 82 | + { |
| 83 | + if (int.TryParse(pathSegments[i], NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) |
| 84 | + { |
| 85 | + nodeIds.Add(pathSegment); |
| 86 | + } |
| 87 | + } |
| 88 | + |
| 89 | + return nodeIds.ToArray(); // allocates a new array |
| 90 | + } |
| 91 | + |
| 92 | + /// <summary> |
| 93 | + /// Here we avoid allocating strings to the separated values, |
| 94 | + /// BUT we do not know the count of numbers, so we might end up resizing the list we add numbers to it |
| 95 | + /// </summary> |
| 96 | + /// <returns>A number so the compiler/runtime doesn't optimize it away.</returns> |
| 97 | + [Benchmark] |
| 98 | + public int SplitToStackSpansWithoutEmptyCheckReversingListAsSpan() |
| 99 | + { |
| 100 | + var totalNumberOfIds = 0; // a number to operate on so it is not optimized away |
| 101 | + foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) |
| 102 | + { |
| 103 | + totalNumberOfIds += SplitToStackSpansWithoutEmptyCheckReversingListAsSpan(stringWithCommaSeparatedNumbers).Length; |
| 104 | + } |
| 105 | + |
| 106 | + return totalNumberOfIds; |
| 107 | + } |
| 108 | + |
| 109 | + private static int[] SplitToStackSpansWithoutEmptyCheckReversingListAsSpan(string path) |
| 110 | + { |
| 111 | + ReadOnlySpan<char> pathSpan = path.AsSpan(); |
| 112 | + MemoryExtensions.SpanSplitEnumerator<char> pathSegments = pathSpan.Split(Constants.CharArrays.Comma); |
| 113 | + |
| 114 | + // Here we do NOT know how large the resulting list should at least be |
| 115 | + // Default empty List<> internal array capacity on add is currently 4 |
| 116 | + // If the count of numbers are less than 4, we overallocate a little |
| 117 | + // If the count of numbers are more than 4, the list will be resized, resulting in a copy from initial array to new double size array |
| 118 | + // If the count of numbers are more than 8, another new array is allocated and copied to |
| 119 | + List<int> nodeIds = []; |
| 120 | + foreach (Range rangeOfPathSegment in pathSegments) |
| 121 | + { |
| 122 | + // this is only a span of the string, a string is not allocated on the heap |
| 123 | + ReadOnlySpan<char> pathSegmentSpan = pathSpan[rangeOfPathSegment]; |
| 124 | + if (int.TryParse(pathSegmentSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) |
| 125 | + { |
| 126 | + nodeIds.Add(pathSegment); |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + Span<int> nodeIdsSpan = CollectionsMarshal.AsSpan(nodeIds); |
| 131 | + var result = new int[nodeIdsSpan.Length]; |
| 132 | + var resultIndex = 0; |
| 133 | + for (int i = nodeIdsSpan.Length - 1; i >= 0; i--) |
| 134 | + { |
| 135 | + result[resultIndex++] = nodeIdsSpan[i]; |
| 136 | + } |
| 137 | + |
| 138 | + return result; |
| 139 | + } |
| 140 | + |
| 141 | + /// <summary> |
| 142 | + /// Here we avoid allocating strings to the separated values, |
| 143 | + /// BUT we do not know the count of numbers, so we might end up resizing the list we add numbers to it |
| 144 | + /// </summary> |
| 145 | + /// <returns>A number so the compiler/runtime doesn't optimize it away.</returns> |
| 146 | + [Benchmark] |
| 147 | + public int SplitToStackSpansWithoutEmptyCheck() |
| 148 | + { |
| 149 | + var totalNumberOfIds = 0; // a number to operate on so it is not optimized away |
| 150 | + foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) |
| 151 | + { |
| 152 | + totalNumberOfIds += SplitToStackSpansWithoutEmptyCheck(stringWithCommaSeparatedNumbers).Length; |
| 153 | + } |
| 154 | + |
| 155 | + return totalNumberOfIds; |
| 156 | + } |
| 157 | + |
| 158 | + private static int[] SplitToStackSpansWithoutEmptyCheck(string path) |
| 159 | + { |
| 160 | + ReadOnlySpan<char> pathSpan = path.AsSpan(); |
| 161 | + MemoryExtensions.SpanSplitEnumerator<char> pathSegments = pathSpan.Split(Constants.CharArrays.Comma); |
| 162 | + |
| 163 | + // Here we do NOT know how large the resulting list should at least be |
| 164 | + // Default empty List<> internal array capacity on add is currently 4 |
| 165 | + // If the count of numbers are less than 4, we overallocate a little |
| 166 | + // If the count of numbers are more than 4, the list will be resized, resulting in a copy from initial array to new double size array |
| 167 | + // If the count of numbers are more than 8, another new array is allocated and copied to |
| 168 | + List<int> nodeIds = []; |
| 169 | + foreach (Range rangeOfPathSegment in pathSegments) |
| 170 | + { |
| 171 | + // this is only a span of the string, a string is not allocated on the heap |
| 172 | + ReadOnlySpan<char> pathSegmentSpan = pathSpan[rangeOfPathSegment]; |
| 173 | + if (int.TryParse(pathSegmentSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) |
| 174 | + { |
| 175 | + nodeIds.Add(pathSegment); |
| 176 | + } |
| 177 | + } |
| 178 | + |
| 179 | + var result = new int[nodeIds.Count]; |
| 180 | + var resultIndex = 0; |
| 181 | + for (int i = nodeIds.Count - 1; i >= 0; i--) |
| 182 | + { |
| 183 | + result[resultIndex++] = nodeIds[i]; |
| 184 | + } |
| 185 | + |
| 186 | + return result; |
| 187 | + } |
| 188 | + |
| 189 | + /// <summary> |
| 190 | + /// Here we avoid allocating strings to the separated values, |
| 191 | + /// BUT we do not know the count of numbers, so we might end up resizing the list we add numbers to it |
| 192 | + /// </summary> |
| 193 | + /// <remarks>Here with an empty check, unlikely in umbraco use case.</remarks> |
| 194 | + /// <returns>A number so the compiler/runtime doesn't optimize it away.</returns> |
| 195 | + [Benchmark] |
| 196 | + public int SplitToStackSpansWithEmptyCheck() |
| 197 | + { |
| 198 | + var totalNumberOfIds = 0; // a number to operate on so it is not optimized away |
| 199 | + foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers) |
| 200 | + { |
| 201 | + totalNumberOfIds += SplitToStackSpansWithEmptyCheck(stringWithCommaSeparatedNumbers).Length; |
| 202 | + } |
| 203 | + |
| 204 | + return totalNumberOfIds; |
| 205 | + } |
| 206 | + |
| 207 | + private static int[] SplitToStackSpansWithEmptyCheck(string path) |
| 208 | + { |
| 209 | + ReadOnlySpan<char> pathSpan = path.AsSpan(); |
| 210 | + MemoryExtensions.SpanSplitEnumerator<char> pathSegments = pathSpan.Split(Constants.CharArrays.Comma); |
| 211 | + |
| 212 | + // Here we do NOT know how large the resulting list should at least be |
| 213 | + // Default empty List<> internal array capacity on add is currently 4 |
| 214 | + // If the count of numbers are less than 4, we overallocate a little |
| 215 | + // If the count of numbers are more than 4, the list will be resized, resulting in a copy from initial array to new double size array |
| 216 | + // If the count of numbers are more than 8, another new array is allocated and copied to |
| 217 | + List<int> nodeIds = []; |
| 218 | + foreach (Range rangeOfPathSegment in pathSegments) |
| 219 | + { |
| 220 | + // this is only a span of the string, a string is not allocated on the heap |
| 221 | + ReadOnlySpan<char> pathSegmentSpan = pathSpan[rangeOfPathSegment]; |
| 222 | + if (pathSegmentSpan.IsEmpty) |
| 223 | + { |
| 224 | + continue; |
| 225 | + } |
| 226 | + |
| 227 | + if (int.TryParse(pathSegmentSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment)) |
| 228 | + { |
| 229 | + nodeIds.Add(pathSegment); |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + var result = new int[nodeIds.Count]; |
| 234 | + var resultIndex = 0; |
| 235 | + for (int i = nodeIds.Count - 1; i >= 0; i--) |
| 236 | + { |
| 237 | + result[resultIndex++] = nodeIds[i]; |
| 238 | + } |
| 239 | + |
| 240 | + return result; |
| 241 | + } |
| 242 | + |
| 243 | +// BenchmarkDotNet v0.14.0, Windows 11 (10.0.26100.2894) |
| 244 | +// Intel Core i7-10750H CPU 2.60GHz, 1 CPU, 12 logical and 6 physical cores |
| 245 | +// .NET Core SDK 3.1.426 [C:\Program Files\dotnet\sdk] |
| 246 | +// [Host] : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2 |
| 247 | +// |
| 248 | +// Toolchain=InProcessEmitToolchain |
| 249 | +// |
| 250 | +// | Method | Mean | Error | StdDev | Gen0 | Allocated | |
| 251 | +// |------------------------------------------------------ |---------:|---------:|---------:|-------:|----------:| |
| 252 | +// | Linq | 46.39 us | 0.515 us | 0.430 us | 9.4604 | 58.31 KB | |
| 253 | +// | SplitToHeapStrings | 30.28 us | 0.310 us | 0.275 us | 7.0801 | 43.55 KB | |
| 254 | +// | SplitToStackSpansWithoutEmptyCheckReversingListAsSpan | 20.47 us | 0.290 us | 0.257 us | 2.7161 | 16.73 KB | |
| 255 | +// | SplitToStackSpansWithoutEmptyCheck | 20.60 us | 0.315 us | 0.280 us | 2.7161 | 16.73 KB | |
| 256 | +// | SplitToStackSpansWithEmptyCheck | 20.57 us | 0.308 us | 0.288 us | 2.7161 | 16.73 KB | |
| 257 | +} |
0 commit comments