Skip to content

Commit

Permalink
bytes, strings: optimize Cut for single-byte separators
Browse files Browse the repository at this point in the history
Optimize the Cut function in both the bytes and strings packages
to immediately return slices when the separator is a single byte (or
character), avoiding more complex index searching logic. This change
can significantly reduce the execution time for these specific cases,
as benchmark tests added to each package demonstrate improvements.

The optimization checks if the length of the separator is one before
proceeding with the existing search strategy. If so, it uses IndexByte
for a faster lookup of the separator's position.

Additionally, benchmark tests have been added for both packages to
demonstrate the performance benefits of this optimization across
various scenarios.

goos: darwin
goarch: arm64
pkg: strings
cpu: Apple M2 Max
                  │ old-cut.txt │             new-cut.txt             │
                  │   sec/op    │   sec/op     vs base                │
Cut/Cut-One/2-12    4.026n ± 2%   3.274n ± 2%  -18.68% (p=0.000 n=10)
Cut/Cut-Two/2-12    8.093n ± 0%   8.357n ± 0%   +3.27% (p=0.000 n=10)
Cut/Cut-One/4-12    4.048n ± 1%   3.324n ± 2%  -17.91% (p=0.000 n=10)
Cut/Cut-Two/4-12    8.105n ± 0%   8.377n ± 1%   +3.35% (p=0.000 n=10)
Cut/Cut-One/8-12    4.089n ± 1%   3.290n ± 1%  -19.53% (p=0.000 n=10)
Cut/Cut-Two/8-12    8.107n ± 1%   8.359n ± 1%   +3.10% (p=0.000 n=10)
Cut/Cut-One/16-12   4.127n ± 1%   3.328n ± 1%  -19.35% (p=0.000 n=10)
Cut/Cut-Two/16-12   8.119n ± 1%   8.374n ± 1%   +3.15% (p=0.000 n=10)
Cut/Cut-One/32-12   4.545n ± 2%   3.675n ± 1%  -19.14% (p=0.000 n=10)
Cut/Cut-Two/32-12   8.708n ± 1%   8.963n ± 1%   +2.92% (p=0.000 n=10)
Cut/Cut-One/64-12   4.825n ± 2%   4.146n ± 1%  -14.08% (p=0.000 n=10)
Cut/Cut-Two/64-12   9.286n ± 0%   9.315n ± 1%        ~ (p=0.105 n=10)
geomean             5.983n        5.486n        -8.32%

                  │ old-cut.txt  │             new-cut.txt             │
                  │     B/op     │    B/op     vs base                 │
Cut/Cut-One/2-12    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-Two/2-12    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-One/4-12    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-Two/4-12    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-One/8-12    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-Two/8-12    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-One/16-12   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-Two/16-12   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-One/32-12   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-Two/32-12   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-One/64-12   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-Two/64-12   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
geomean                        ²               +0.00%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean

                  │ old-cut.txt  │             new-cut.txt             │
                  │  allocs/op   │ allocs/op   vs base                 │
Cut/Cut-One/2-12    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-Two/2-12    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-One/4-12    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-Two/4-12    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-One/8-12    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-Two/8-12    0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-One/16-12   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-Two/16-12   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-One/32-12   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-Two/32-12   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-One/64-12   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Cut/Cut-Two/64-12   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
geomean                        ²               +0.00%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean

For golang#67101
  • Loading branch information
aimuz committed May 1, 2024
1 parent db5f2b4 commit c3b4c36
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 0 deletions.
6 changes: 6 additions & 0 deletions src/bytes/bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -1330,6 +1330,12 @@ func Index(s, sep []byte) int {
//
// Cut returns slices of the original slice s, not copies.
func Cut(s, sep []byte) (before, after []byte, found bool) {
if len(sep) == 1 {
if i := IndexByte(s, sep[0]); i >= 0 {
return s[:i], s[i+1:], true
}
return s, nil, false
}
if i := Index(s, sep); i >= 0 {
return s[:i], s[i+len(sep):], true
}
Expand Down
18 changes: 18 additions & 0 deletions src/bytes/bytes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2263,3 +2263,21 @@ func TestClone(t *testing.T) {
}
}
}

func BenchmarkCut(b *testing.B) {
b.ReportAllocs()

for _, skip := range [...]int{2, 4, 8, 16, 32, 64} {
s := Repeat(append(append(Repeat([]byte(" "), skip), 'a', 'a'), Repeat([]byte(" "), skip)...), 1<<16/skip)
b.Run(fmt.Sprintf("Cut-One/%d", skip), func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _, _ = Cut(s, []byte{'a'})
}
})
b.Run(fmt.Sprintf("Cut-Two/%d", skip), func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _, _ = Cut(s, []byte{'a', 'a'})
}
})
}
}
7 changes: 7 additions & 0 deletions src/strings/strings.go
Original file line number Diff line number Diff line change
Expand Up @@ -1309,9 +1309,16 @@ func Index(s, substr string) int {
// The found result reports whether sep appears in s.
// If sep does not appear in s, cut returns s, "", false.
func Cut(s, sep string) (before, after string, found bool) {
if len(sep) == 1 {
if i := IndexByte(s, sep[0]); i >= 0 {
return s[:i], s[i+1:], true
}
return s, "", false
}
if i := Index(s, sep); i >= 0 {
return s[:i], s[i+len(sep):], true
}

return s, "", false
}

Expand Down
18 changes: 18 additions & 0 deletions src/strings/strings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2071,3 +2071,21 @@ func BenchmarkReplaceAll(b *testing.B) {
stringSink = ReplaceAll("banana", "a", "<>")
}
}

func BenchmarkCut(b *testing.B) {
b.ReportAllocs()

for _, skip := range [...]int{2, 4, 8, 16, 32, 64} {
s := Repeat(Repeat(" ", skip)+"aa"+Repeat(" ", skip), 1<<16/skip)
b.Run(fmt.Sprintf("Cut-One/%d", skip), func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _, _ = Cut(s, "a")
}
})
b.Run(fmt.Sprintf("Cut-Two/%d", skip), func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _, _ = Cut(s, "aa")
}
})
}
}

0 comments on commit c3b4c36

Please sign in to comment.