Skip to content

Commit

Permalink
add StringBuilder.GetChunks (#177)
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonCropp committed Apr 25, 2024
1 parent 311faaf commit 695002b
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 9 deletions.
2 changes: 1 addition & 1 deletion apiCount.include.md
Original file line number Diff line number Diff line change
@@ -1 +1 @@
**API count: 287**
**API count: 288**
1 change: 1 addition & 0 deletions api_list.include.md
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@
* `StringBuilder AppendLine(StringBuilder, IFormatProvider, StringBuilder/AppendInterpolatedStringHandler&)` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.appendline#system-text-stringbuilder-appendline(system-iformatprovider-system-text-stringbuilder-appendinterpolatedstringhandler@))
* `Void CopyTo(Int32, Span<Char>, Int32)` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.copyto#system-text-stringbuilder-copyto(system-int32-system-span((system-char))-system-int32))
* `Boolean Equals(ReadOnlySpan<Char>)` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.equals#system-text-stringbuilder-equals(system-readonlyspan((system-char))))
* `Polyfill/ChunkEnumerator GetChunks()` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.getchunks)
* `StringBuilder Replace(ReadOnlySpan<Char>, ReadOnlySpan<Char>)` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.replace#system-text-stringbuilder-replace(system-readonlyspan((system-char))-system-readonlyspan((system-char))))
* `StringBuilder Replace(ReadOnlySpan<Char>, ReadOnlySpan<Char>, Int32, Int32)` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.replace#system-text-stringbuilder-replace(system-readonlyspan((system-char))-system-readonlyspan((system-char))-system-int32-system-int32)

Expand Down
3 changes: 2 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ The package targets `netstandard2.0` and is designed to support the following ru
* `net5.0`, `net6.0`, `net7.0`, `net8.0`, `net9.0`


**API count: 287**<!-- singleLineInclude: apiCount. path: /apiCount.include.md -->
**API count: 288**<!-- singleLineInclude: apiCount. path: /apiCount.include.md -->


**See [Milestones](../../milestones?state=closed) for release notes.**
Expand Down Expand Up @@ -688,6 +688,7 @@ The class `Polyfill` includes the following extension methods:
* `StringBuilder AppendLine(StringBuilder, IFormatProvider, StringBuilder/AppendInterpolatedStringHandler&)` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.appendline#system-text-stringbuilder-appendline(system-iformatprovider-system-text-stringbuilder-appendinterpolatedstringhandler@))
* `Void CopyTo(Int32, Span<Char>, Int32)` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.copyto#system-text-stringbuilder-copyto(system-int32-system-span((system-char))-system-int32))
* `Boolean Equals(ReadOnlySpan<Char>)` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.equals#system-text-stringbuilder-equals(system-readonlyspan((system-char))))
* `Polyfill/ChunkEnumerator GetChunks()` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.getchunks)
* `StringBuilder Replace(ReadOnlySpan<Char>, ReadOnlySpan<Char>)` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.replace#system-text-stringbuilder-replace(system-readonlyspan((system-char))-system-readonlyspan((system-char))))
* `StringBuilder Replace(ReadOnlySpan<Char>, ReadOnlySpan<Char>, Int32, Int32)` [reference](https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.replace#system-text-stringbuilder-replace(system-readonlyspan((system-char))-system-readonlyspan((system-char))-system-int32-system-int32)

Expand Down
2 changes: 1 addition & 1 deletion src/Directory.Build.props
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<Project>
<PropertyGroup>
<Version>5.0.0</Version>
<Version>5.1.0</Version>
<AssemblyVersion>1.0.0</AssemblyVersion>
<PackageTags>Polyfill</PackageTags>
<DisableImplicitNamespaceImports>true</DisableImplicitNamespaceImports>
Expand Down
217 changes: 211 additions & 6 deletions src/Polyfill/Polyfill_StringBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#pragma warning disable

using System;
using System.ComponentModel;
using System.IO;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
Expand Down Expand Up @@ -112,7 +114,8 @@ public static bool Equals(this StringBuilder target, ReadOnlySpan<char> span)
[Link("https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.append#system-text-stringbuilder-append(system-text-stringbuilder-appendinterpolatedstringhandler@)")]
public static StringBuilder Append(
StringBuilder target,
[InterpolatedStringHandlerArgument(nameof(target))] ref AppendInterpolatedStringHandler handler) => target;
[InterpolatedStringHandlerArgument(nameof(target))]
ref AppendInterpolatedStringHandler handler) => target;

/// <summary>Appends the specified interpolated string to this instance.</summary>
/// <param name="provider">An object that supplies culture-specific formatting information.</param>
Expand All @@ -122,15 +125,17 @@ public static bool Equals(this StringBuilder target, ReadOnlySpan<char> span)
public static StringBuilder Append(
StringBuilder target,
IFormatProvider? provider,
[InterpolatedStringHandlerArgument(nameof(target), nameof(provider))] ref AppendInterpolatedStringHandler handler) => target;
[InterpolatedStringHandlerArgument(nameof(target), nameof(provider))]
ref AppendInterpolatedStringHandler handler) => target;

/// <summary>Appends the specified interpolated string followed by the default line terminator to the end of the current StringBuilder object.</summary>
/// <param name="handler">The interpolated string to append.</param>
/// <returns>A reference to this instance after the append operation has completed.</returns>
[Link("https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.appendline#system-text-stringbuilder-appendline(system-text-stringbuilder-appendinterpolatedstringhandler@)")]
public static StringBuilder AppendLine(
StringBuilder target,
[InterpolatedStringHandlerArgument(nameof(target))] ref AppendInterpolatedStringHandler handler) =>
[InterpolatedStringHandlerArgument(nameof(target))]
ref AppendInterpolatedStringHandler handler) =>
target.AppendLine();

/// <summary>Appends the specified interpolated string followed by the default line terminator to the end of the current StringBuilder object.</summary>
Expand All @@ -141,10 +146,11 @@ public static bool Equals(this StringBuilder target, ReadOnlySpan<char> span)
public static StringBuilder AppendLine(
StringBuilder target,
IFormatProvider? provider,
[InterpolatedStringHandlerArgument(nameof(target), nameof(provider))] ref AppendInterpolatedStringHandler handler) =>
[InterpolatedStringHandlerArgument(nameof(target), nameof(provider))]
ref AppendInterpolatedStringHandler handler) =>
target.AppendLine();

#elif NET6_0_OR_GREATER
#elif NET6_0_OR_GREATER

/// <summary>Appends the specified interpolated string to this instance.</summary>
/// <param name="handler">The interpolated string to append.</param>
Expand Down Expand Up @@ -187,7 +193,7 @@ public static bool Equals(this StringBuilder target, ReadOnlySpan<char> span)
target.AppendLine(provider, ref handler);
#endif

#if NETSTANDARD2_0|| NETFRAMEWORK
#if NETSTANDARD2_0|| NETFRAMEWORK

/// <summary>Concatenates the strings of the provided array, using the specified separator between each string, then appends the result to the current instance of the string builder.</summary>
/// <param name="separator">The string to use as a separator. separator is included in the joined strings only if values has more than one element.</param>
Expand Down Expand Up @@ -286,4 +292,203 @@ public static bool Equals(this StringBuilder target, ReadOnlySpan<char> span)
public static StringBuilder Replace(this StringBuilder target, ReadOnlySpan<char> oldValue, ReadOnlySpan<char> newValue, int startIndex, int count) =>
target.Replace(oldValue.ToString(), newValue.ToString(), startIndex, count);
#endif

#if !NET6_0_OR_GREATER && FeatureMemory

static FieldInfo chunkCharsField = GetStringBuilderField("m_ChunkChars");
static FieldInfo chunkPreviousField = GetStringBuilderField("m_ChunkPrevious");
static FieldInfo chunkLengthField = GetStringBuilderField("m_ChunkLength");

static FieldInfo GetStringBuilderField(string name)
{
var field = typeof(StringBuilder).GetField(name, BindingFlags.Instance | BindingFlags.NonPublic);
if (field != null)
{
return field;
}

throw new($"Expected to find field '{name}' on StringBuilder");
}

static int GetChunkLength(StringBuilder stringBuilder) =>
(int) chunkLengthField.GetValue(stringBuilder)!;

static char[] GetChunkChars(StringBuilder stringBuilder) =>
(char[]) chunkCharsField.GetValue(stringBuilder)!;

static StringBuilder? GetChunkPrevious(StringBuilder stringBuilder) =>
(StringBuilder?) chunkPreviousField.GetValue(stringBuilder);

/// <summary>
/// GetChunks returns ChunkEnumerator that follows the IEnumerable pattern and
/// thus can be used in a C# 'foreach' statements to retrieve the data in the StringBuilder
/// as chunks (ReadOnlyMemory) of characters. An example use is:
///
/// foreach (ReadOnlyMemory&lt;char&gt; chunk in sb.GetChunks())
/// foreach (char c in chunk.Span)
/// { /* operation on c }
///
/// It is undefined what happens if the StringBuilder is modified while the chunk
/// enumeration is incomplete. StringBuilder is also not thread-safe, so operating
/// on it with concurrent threads is illegal. Finally the ReadOnlyMemory chunks returned
/// are NOT guaranteed to remain unchanged if the StringBuilder is modified, so do
/// not cache them for later use either. This API's purpose is efficiently extracting
/// the data of a CONSTANT StringBuilder.
///
/// Creating a ReadOnlySpan from a ReadOnlyMemory (the .Span property) is expensive
/// compared to the fetching of the character, so create a local variable for the SPAN
/// if you need to use it in a nested for statement. For example
///
/// foreach (ReadOnlyMemory&lt;char&gt; chunk in sb.GetChunks())
/// {
/// var span = chunk.Span;
/// for (int i = 0; i &lt; span.Length; i++)
/// { /* operation on span[i] */ }
/// }
/// </summary>
[Link("https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.getchunks")]
public static ChunkEnumerator GetChunks(this StringBuilder target) =>
new(target);

/// <summary>
/// ChunkEnumerator supports both the IEnumerable and IEnumerator pattern so foreach
/// works (see GetChunks). It needs to be public (so the compiler can use it
/// when building a foreach statement) but users typically don't use it explicitly.
/// (which is why it is a nested type).
/// </summary>
public struct ChunkEnumerator
{
// The first Stringbuilder chunk (which is the end of the logical string)
StringBuilder _firstChunk;

// The chunk that this enumerator is currently returning (Current).
StringBuilder? _currentChunk;

// Only used for long string builders with many chunks (see constructor)
ManyChunkInfo? _manyChunks;

// Only here to make foreach work
/// <summary>
/// Implement IEnumerable.GetEnumerator() to return 'this' as the IEnumerator
/// </summary>
[EditorBrowsable(EditorBrowsableState.Never)]
public ChunkEnumerator GetEnumerator()
{
return this;
}

/// <summary>
/// Implements the IEnumerator pattern.
/// </summary>
public bool MoveNext()
{
if (_currentChunk == _firstChunk)
{
return false;
}

if (_manyChunks != null)
{
return _manyChunks.MoveNext(ref _currentChunk);
}

var next = _firstChunk;
while (true)
{
var chunkPrevious = GetChunkPrevious(next);
if (chunkPrevious == _currentChunk)
{
break;
}

next = chunkPrevious;
}

_currentChunk = next;
return true;
}

/// <summary>
/// Implements the IEnumerator pattern.
/// </summary>
public ReadOnlyMemory<char> Current
{
get
{
if (_currentChunk == null)
{
throw new InvalidOperationException("Enumeration operation cant happen");
}

return new ReadOnlyMemory<char>(GetChunkChars(_currentChunk), 0, GetChunkLength(_currentChunk));
}
}

internal ChunkEnumerator(StringBuilder builder)
{
_firstChunk = builder;
// MoveNext will find the last chunk if we do this.
_currentChunk = null;
_manyChunks = null;

// There is a performance-vs-allocation tradeoff. Because the chunks
// are a linked list with each chunk pointing to its PREDECESSOR, walking
// the list FORWARD is not efficient. If there are few chunks (< 8) we
// simply scan from the start each time, and tolerate the N*N behavior.
// However above this size, we allocate an array to hold reference to all
// the chunks and we can be efficient for large N.
var chunkCount = ChunkCount(builder);
if (8 < chunkCount)
{
_manyChunks = new ManyChunkInfo(builder, chunkCount);
}
}

static int ChunkCount(StringBuilder? builder)
{
var ret = 0;
while (builder != null)
{
ret++;
builder = GetChunkPrevious(builder);
}

return ret;
}

/// <summary>
/// Used to hold all the chunks indexes when you have many chunks.
/// </summary>
class ManyChunkInfo
{
// These are in normal order (first chunk first)
StringBuilder[] _chunks;
int _chunkPos;

public bool MoveNext(ref StringBuilder? current)
{
int pos = ++_chunkPos;
if (_chunks.Length <= pos)
{
return false;
}

current = _chunks[pos];
return true;
}

public ManyChunkInfo(StringBuilder? builder, int chunkCount)
{
_chunks = new StringBuilder[chunkCount];
while (0 <= --chunkCount)
{
_chunks[chunkCount] = builder;
builder = GetChunkPrevious(builder);
}

_chunkPos = -1;
}
}
}
#endif
}
19 changes: 19 additions & 0 deletions src/Tests/PolyfillTests_StringBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,25 @@ public void Replace()
Assert.AreEqual("b", builder.ToString());
}

#if FeatureMemory

[Test]
public void GetChunks()
{
var builder = new StringBuilder("a",1);
builder.Append("bb");
var list = new List<string>();
foreach (var chunk in builder.GetChunks())
{
list.Add(chunk.ToString());
}

Assert.AreEqual("a", list[0]);
Assert.AreEqual("bb", list[1]);
}

#endif

[Test]
public void Append()
{
Expand Down

0 comments on commit 695002b

Please sign in to comment.