Skip to content

Commit d9a31d3

Browse files
authored
Avoid allocating strings for parsing comma separated int values (#18199)
1 parent 822c837 commit d9a31d3

File tree

4 files changed

+272
-7
lines changed

4 files changed

+272
-7
lines changed

src/Umbraco.Core/Extensions/StringExtensions.cs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.ComponentModel.DataAnnotations;
66
using System.Diagnostics.CodeAnalysis;
77
using System.Globalization;
8+
using System.Runtime.InteropServices;
89
using System.Security.Cryptography;
910
using System.Text;
1011
using System.Text.RegularExpressions;
@@ -57,17 +58,24 @@ static StringExtensions()
5758
/// <returns></returns>
5859
public static int[] GetIdsFromPathReversed(this string path)
5960
{
60-
string[] pathSegments = path.Split(Constants.CharArrays.Comma, StringSplitOptions.RemoveEmptyEntries);
61-
List<int> nodeIds = new(pathSegments.Length);
62-
for (int i = pathSegments.Length - 1; i >= 0; i--)
61+
ReadOnlySpan<char> pathSpan = path.AsSpan();
62+
List<int> nodeIds = [];
63+
foreach (Range rangeOfPathSegment in pathSpan.Split(Constants.CharArrays.Comma))
6364
{
64-
if (int.TryParse(pathSegments[i], NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment))
65+
if (int.TryParse(pathSpan[rangeOfPathSegment], NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment))
6566
{
6667
nodeIds.Add(pathSegment);
6768
}
6869
}
6970

70-
return nodeIds.ToArray();
71+
var result = new int[nodeIds.Count];
72+
var resultIndex = 0;
73+
for (int i = nodeIds.Count - 1; i >= 0; i--)
74+
{
75+
result[resultIndex++] = nodeIds[i];
76+
}
77+
78+
return result;
7179
}
7280

7381
/// <summary>

src/Umbraco.Core/Services/PublicAccessService.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
namespace Umbraco.Cms.Core.Services;
1616

17-
internal class PublicAccessService : RepositoryService, IPublicAccessService
17+
internal sealed class PublicAccessService : RepositoryService, IPublicAccessService
1818
{
1919
private readonly IPublicAccessRepository _publicAccessRepository;
2020
private readonly IEntityService _entityService;

src/Umbraco.Core/Services/UserService.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2617,7 +2617,7 @@ private EntityPermissionCollection GetPermissionsForPath(IReadOnlyUserGroup[] gr
26172617
{
26182618
if (pathIds.Length == 0)
26192619
{
2620-
return new EntityPermissionCollection(Enumerable.Empty<EntityPermission>());
2620+
return new EntityPermissionCollection([]);
26212621
}
26222622

26232623
// get permissions for all nodes in the path by group
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
using System.Globalization;
2+
using System.Runtime.InteropServices;
3+
using BenchmarkDotNet.Attributes;
4+
using Umbraco.Cms.Core;
5+
6+
namespace Umbraco.Tests.Benchmarks;
7+
8+
[MemoryDiagnoser]
9+
public class StringExtensionsBenchmarks
10+
{
11+
private static readonly Random _seededRandom = new(60);
12+
private const int Size = 100;
13+
private static readonly string[] _stringsWithCommaSeparatedNumbers = new string[Size];
14+
15+
static StringExtensionsBenchmarks()
16+
{
17+
for (var i = 0; i < Size; i++)
18+
{
19+
int countOfNumbers = _seededRandom.Next(2, 10); // guess on path lengths in normal use
20+
int[] randomIds = new int[countOfNumbers];
21+
for (var i1 = 0; i1 < countOfNumbers; i1++)
22+
{
23+
randomIds[i1] = _seededRandom.Next(-1, int.MaxValue);
24+
}
25+
26+
_stringsWithCommaSeparatedNumbers[i] = string.Join(',', randomIds);
27+
}
28+
}
29+
30+
/// <summary>
31+
/// Ye olden way of doing it (before 20250201 https://github.com/umbraco/Umbraco-CMS/pull/18048)
32+
/// </summary>
33+
/// <returns>A number so the compiler/runtime doesn't optimize it away.</returns>
34+
[Benchmark]
35+
public int Linq()
36+
{
37+
var totalNumberOfIds = 0; // a number to operate on so it is not optimized away
38+
foreach (string? stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers)
39+
{
40+
totalNumberOfIds += Linq(stringWithCommaSeparatedNumbers).Length;
41+
}
42+
43+
return totalNumberOfIds;
44+
}
45+
46+
private static int[] Linq(string path)
47+
{
48+
int[]? nodeIds = path.Split(Constants.CharArrays.Comma, StringSplitOptions.RemoveEmptyEntries)
49+
.Select(x =>
50+
int.TryParse(x, NumberStyles.Integer, CultureInfo.InvariantCulture, out var output)
51+
? Attempt<int>.Succeed(output)
52+
: Attempt<int>.Fail())
53+
.Where(x => x.Success)
54+
.Select(x => x.Result)
55+
.Reverse()
56+
.ToArray();
57+
return nodeIds;
58+
}
59+
60+
/// <summary>
61+
/// Here we are allocating strings to the separated values,
62+
/// BUT we know the count of numbers, so we can allocate the exact size of list we need
63+
/// </summary>
64+
/// <returns>A number so the compiler/runtime doesn't optimize it away.</returns>
65+
[Benchmark]
66+
public int SplitToHeapStrings()
67+
{
68+
var totalNumberOfIds = 0; // a number to operate on so it is not optimized away
69+
foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers)
70+
{
71+
totalNumberOfIds += SplitToHeapStrings(stringWithCommaSeparatedNumbers).Length;
72+
}
73+
74+
return totalNumberOfIds;
75+
}
76+
77+
private static int[] SplitToHeapStrings(string path)
78+
{
79+
string[] pathSegments = path.Split(Constants.CharArrays.Comma, StringSplitOptions.RemoveEmptyEntries);
80+
List<int> nodeIds = new(pathSegments.Length); // here we know how large the resulting list should at least be
81+
for (int i = pathSegments.Length - 1; i >= 0; i--)
82+
{
83+
if (int.TryParse(pathSegments[i], NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment))
84+
{
85+
nodeIds.Add(pathSegment);
86+
}
87+
}
88+
89+
return nodeIds.ToArray(); // allocates a new array
90+
}
91+
92+
/// <summary>
93+
/// Here we avoid allocating strings to the separated values,
94+
/// BUT we do not know the count of numbers, so we might end up resizing the list we add numbers to it
95+
/// </summary>
96+
/// <returns>A number so the compiler/runtime doesn't optimize it away.</returns>
97+
[Benchmark]
98+
public int SplitToStackSpansWithoutEmptyCheckReversingListAsSpan()
99+
{
100+
var totalNumberOfIds = 0; // a number to operate on so it is not optimized away
101+
foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers)
102+
{
103+
totalNumberOfIds += SplitToStackSpansWithoutEmptyCheckReversingListAsSpan(stringWithCommaSeparatedNumbers).Length;
104+
}
105+
106+
return totalNumberOfIds;
107+
}
108+
109+
private static int[] SplitToStackSpansWithoutEmptyCheckReversingListAsSpan(string path)
110+
{
111+
ReadOnlySpan<char> pathSpan = path.AsSpan();
112+
MemoryExtensions.SpanSplitEnumerator<char> pathSegments = pathSpan.Split(Constants.CharArrays.Comma);
113+
114+
// Here we do NOT know how large the resulting list should at least be
115+
// Default empty List<> internal array capacity on add is currently 4
116+
// If the count of numbers are less than 4, we overallocate a little
117+
// If the count of numbers are more than 4, the list will be resized, resulting in a copy from initial array to new double size array
118+
// If the count of numbers are more than 8, another new array is allocated and copied to
119+
List<int> nodeIds = [];
120+
foreach (Range rangeOfPathSegment in pathSegments)
121+
{
122+
// this is only a span of the string, a string is not allocated on the heap
123+
ReadOnlySpan<char> pathSegmentSpan = pathSpan[rangeOfPathSegment];
124+
if (int.TryParse(pathSegmentSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment))
125+
{
126+
nodeIds.Add(pathSegment);
127+
}
128+
}
129+
130+
Span<int> nodeIdsSpan = CollectionsMarshal.AsSpan(nodeIds);
131+
var result = new int[nodeIdsSpan.Length];
132+
var resultIndex = 0;
133+
for (int i = nodeIdsSpan.Length - 1; i >= 0; i--)
134+
{
135+
result[resultIndex++] = nodeIdsSpan[i];
136+
}
137+
138+
return result;
139+
}
140+
141+
/// <summary>
142+
/// Here we avoid allocating strings to the separated values,
143+
/// BUT we do not know the count of numbers, so we might end up resizing the list we add numbers to it
144+
/// </summary>
145+
/// <returns>A number so the compiler/runtime doesn't optimize it away.</returns>
146+
[Benchmark]
147+
public int SplitToStackSpansWithoutEmptyCheck()
148+
{
149+
var totalNumberOfIds = 0; // a number to operate on so it is not optimized away
150+
foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers)
151+
{
152+
totalNumberOfIds += SplitToStackSpansWithoutEmptyCheck(stringWithCommaSeparatedNumbers).Length;
153+
}
154+
155+
return totalNumberOfIds;
156+
}
157+
158+
private static int[] SplitToStackSpansWithoutEmptyCheck(string path)
159+
{
160+
ReadOnlySpan<char> pathSpan = path.AsSpan();
161+
MemoryExtensions.SpanSplitEnumerator<char> pathSegments = pathSpan.Split(Constants.CharArrays.Comma);
162+
163+
// Here we do NOT know how large the resulting list should at least be
164+
// Default empty List<> internal array capacity on add is currently 4
165+
// If the count of numbers are less than 4, we overallocate a little
166+
// If the count of numbers are more than 4, the list will be resized, resulting in a copy from initial array to new double size array
167+
// If the count of numbers are more than 8, another new array is allocated and copied to
168+
List<int> nodeIds = [];
169+
foreach (Range rangeOfPathSegment in pathSegments)
170+
{
171+
// this is only a span of the string, a string is not allocated on the heap
172+
ReadOnlySpan<char> pathSegmentSpan = pathSpan[rangeOfPathSegment];
173+
if (int.TryParse(pathSegmentSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment))
174+
{
175+
nodeIds.Add(pathSegment);
176+
}
177+
}
178+
179+
var result = new int[nodeIds.Count];
180+
var resultIndex = 0;
181+
for (int i = nodeIds.Count - 1; i >= 0; i--)
182+
{
183+
result[resultIndex++] = nodeIds[i];
184+
}
185+
186+
return result;
187+
}
188+
189+
/// <summary>
190+
/// Here we avoid allocating strings to the separated values,
191+
/// BUT we do not know the count of numbers, so we might end up resizing the list we add numbers to it
192+
/// </summary>
193+
/// <remarks>Here with an empty check, unlikely in umbraco use case.</remarks>
194+
/// <returns>A number so the compiler/runtime doesn't optimize it away.</returns>
195+
[Benchmark]
196+
public int SplitToStackSpansWithEmptyCheck()
197+
{
198+
var totalNumberOfIds = 0; // a number to operate on so it is not optimized away
199+
foreach (string stringWithCommaSeparatedNumbers in _stringsWithCommaSeparatedNumbers)
200+
{
201+
totalNumberOfIds += SplitToStackSpansWithEmptyCheck(stringWithCommaSeparatedNumbers).Length;
202+
}
203+
204+
return totalNumberOfIds;
205+
}
206+
207+
private static int[] SplitToStackSpansWithEmptyCheck(string path)
208+
{
209+
ReadOnlySpan<char> pathSpan = path.AsSpan();
210+
MemoryExtensions.SpanSplitEnumerator<char> pathSegments = pathSpan.Split(Constants.CharArrays.Comma);
211+
212+
// Here we do NOT know how large the resulting list should at least be
213+
// Default empty List<> internal array capacity on add is currently 4
214+
// If the count of numbers are less than 4, we overallocate a little
215+
// If the count of numbers are more than 4, the list will be resized, resulting in a copy from initial array to new double size array
216+
// If the count of numbers are more than 8, another new array is allocated and copied to
217+
List<int> nodeIds = [];
218+
foreach (Range rangeOfPathSegment in pathSegments)
219+
{
220+
// this is only a span of the string, a string is not allocated on the heap
221+
ReadOnlySpan<char> pathSegmentSpan = pathSpan[rangeOfPathSegment];
222+
if (pathSegmentSpan.IsEmpty)
223+
{
224+
continue;
225+
}
226+
227+
if (int.TryParse(pathSegmentSpan, NumberStyles.Integer, CultureInfo.InvariantCulture, out int pathSegment))
228+
{
229+
nodeIds.Add(pathSegment);
230+
}
231+
}
232+
233+
var result = new int[nodeIds.Count];
234+
var resultIndex = 0;
235+
for (int i = nodeIds.Count - 1; i >= 0; i--)
236+
{
237+
result[resultIndex++] = nodeIds[i];
238+
}
239+
240+
return result;
241+
}
242+
243+
// BenchmarkDotNet v0.14.0, Windows 11 (10.0.26100.2894)
244+
// Intel Core i7-10750H CPU 2.60GHz, 1 CPU, 12 logical and 6 physical cores
245+
// .NET Core SDK 3.1.426 [C:\Program Files\dotnet\sdk]
246+
// [Host] : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2
247+
//
248+
// Toolchain=InProcessEmitToolchain
249+
//
250+
// | Method | Mean | Error | StdDev | Gen0 | Allocated |
251+
// |------------------------------------------------------ |---------:|---------:|---------:|-------:|----------:|
252+
// | Linq | 46.39 us | 0.515 us | 0.430 us | 9.4604 | 58.31 KB |
253+
// | SplitToHeapStrings | 30.28 us | 0.310 us | 0.275 us | 7.0801 | 43.55 KB |
254+
// | SplitToStackSpansWithoutEmptyCheckReversingListAsSpan | 20.47 us | 0.290 us | 0.257 us | 2.7161 | 16.73 KB |
255+
// | SplitToStackSpansWithoutEmptyCheck | 20.60 us | 0.315 us | 0.280 us | 2.7161 | 16.73 KB |
256+
// | SplitToStackSpansWithEmptyCheck | 20.57 us | 0.308 us | 0.288 us | 2.7161 | 16.73 KB |
257+
}

0 commit comments

Comments
 (0)