Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve string encoding by following json approach #1350

Merged
merged 9 commits into from Sep 9, 2023
5 changes: 5 additions & 0 deletions buffer/buffer.go
Expand Up @@ -42,6 +42,11 @@ func (b *Buffer) AppendByte(v byte) {
b.bs = append(b.bs, v)
}

// AppendByteV writes a single byte to the Buffer.
func (b *Buffer) AppendByteV(v ...byte) {
b.bs = append(b.bs, v...)
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should probably be named AppendBytes and take a []byte, not a vararg. (Also, the docstring is inaccurate.)

(FYI, there's also Buffer.Write which does the same, while satisfying io.Writer, but there's no problem with also having this method. However, if we have both, maybe Write should call AppendBytes.)


// AppendString writes a string to the Buffer.
func (b *Buffer) AppendString(s string) {
b.bs = append(b.bs, s...)
Expand Down
88 changes: 44 additions & 44 deletions zapcore/json_encoder.go
Expand Up @@ -23,8 +23,10 @@
import (
"encoding/base64"
"math"
"reflect"
"time"
"unicode/utf8"
"unsafe"

"go.uber.org/zap/buffer"
"go.uber.org/zap/internal/bufferpool"
Expand Down Expand Up @@ -486,67 +488,65 @@
// Unlike the standard library's encoder, it doesn't attempt to protect the
// user from browser vulnerabilities or JSONP-related problems.
func (enc *jsonEncoder) safeAddString(s string) {
for i := 0; i < len(s); {
if enc.tryAddRuneSelf(s[i]) {
i++
continue
}
r, size := utf8.DecodeRuneInString(s[i:])
if enc.tryAddRuneError(r, size) {
i++
continue
}
enc.buf.AppendString(s[i : i+size])
i += size
}
enc.safeAddByteString(*(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{

Check failure on line 491 in zapcore/json_encoder.go

View workflow job for this annotation

GitHub Actions / Lint

unsafeptr: possible misuse of reflect.SliceHeader (govet)
Data: (*reflect.StringHeader)(unsafe.Pointer(&s)).Data,
Len: len(s),
Cap: len(s),
})))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Putting aside that this is decidedly Not Safe (in a function called safeAddString), starting Go 1.20, the above isn't the best way for an unsafe string to byte slice conversion.

It's better to now do: unsafe.Slice(unsafe.StringData(s), len(s)).

https://go.dev/play/p/mGnV97K5tu_w

}

// safeAddByteString is no-alloc equivalent of safeAddString(string(s)) for s []byte.
func (enc *jsonEncoder) safeAddByteString(s []byte) {
start := 0
for i := 0; i < len(s); {
if enc.tryAddRuneSelf(s[i]) {
if s[i] < utf8.RuneSelf {
if s[i] >= 0x20 && s[i] != '\\' && s[i] != '"' {
i++
continue
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very nice! This right here is the performance win.
Scanning until the next position escaping is needed instead of appending one byte at a time.

I'm in favor of such a change, but I would prefer if we could do this without the unsafe.

}

enc.buf.AppendByteV(s[start:i]...)

switch s[i] {
case '\\', '"':
enc.buf.AppendByte('\\')
enc.buf.AppendByte(s[i])
case '\n':
enc.buf.AppendByte('\\')
enc.buf.AppendByte('n')
case '\r':
enc.buf.AppendByte('\\')
enc.buf.AppendByte('r')
case '\t':
enc.buf.AppendByte('\\')
enc.buf.AppendByte('t')
default:
// Encode bytes < 0x20, except for the escape sequences above.
enc.buf.AppendString(`\u00`)
enc.buf.AppendByte(_hex[s[i]>>4])
enc.buf.AppendByte(_hex[s[i]&0xF])
}

i++
start = i
continue
}

enc.buf.AppendByteV(s[start:i]...)

r, size := utf8.DecodeRune(s[i:])
if enc.tryAddRuneError(r, size) {
i++
start = i
continue
}
enc.buf.Write(s[i : i+size])
i += size
start = i
}
}

// tryAddRuneSelf appends b if it is valid UTF-8 character represented in a single byte.
func (enc *jsonEncoder) tryAddRuneSelf(b byte) bool {
if b >= utf8.RuneSelf {
return false
}
if b >= 0x20 && b != '\\' && b != '"' {
enc.buf.AppendByte(b)
return true
}
switch b {
case '\\', '"':
enc.buf.AppendByte('\\')
enc.buf.AppendByte(b)
case '\n':
enc.buf.AppendByte('\\')
enc.buf.AppendByte('n')
case '\r':
enc.buf.AppendByte('\\')
enc.buf.AppendByte('r')
case '\t':
enc.buf.AppendByte('\\')
enc.buf.AppendByte('t')
default:
// Encode bytes < 0x20, except for the escape sequences above.
enc.buf.AppendString(`\u00`)
enc.buf.AppendByte(_hex[b>>4])
enc.buf.AppendByte(_hex[b&0xF])
}
return true
// add remaining
enc.buf.AppendByteV(s[start:]...)
}

func (enc *jsonEncoder) tryAddRuneError(r rune, size int) bool {
Expand Down
33 changes: 29 additions & 4 deletions zapcore/json_encoder_bench_test.go
Expand Up @@ -22,6 +22,7 @@ package zapcore_test

import (
"encoding/json"
"fmt"
"testing"
"time"

Expand Down Expand Up @@ -51,7 +52,28 @@ func BenchmarkZapJSONFloat32AndComplex64(b *testing.B) {
})
}

const _sliceSize = 5000

type StringSlice []string

func (s StringSlice) MarshalLogArray(encoder ArrayEncoder) error {
for _, str := range s {
encoder.AppendString(str)
}
return nil
}

func generateStringSlice(n int) StringSlice {
output := make(StringSlice, 0, n)
for i := 0; i < n; i++ {
output = append(output, fmt.Sprint("00000000-0000-0000-0000-0000000000", i))
}
return output
}

func BenchmarkZapJSON(b *testing.B) {
additional := generateStringSlice(_sliceSize)
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
enc := NewJSONEncoder(testEncoderConfig())
Expand All @@ -64,6 +86,7 @@ func BenchmarkZapJSON(b *testing.B) {
enc.AddString("string3", "🤔")
enc.AddString("string4", "🙊")
enc.AddBool("bool", true)
_ = enc.AddArray("test", additional)
buf, _ := enc.EncodeEntry(Entry{
Message: "fake",
Level: DebugLevel,
Expand All @@ -75,10 +98,11 @@ func BenchmarkZapJSON(b *testing.B) {

func BenchmarkStandardJSON(b *testing.B) {
record := struct {
Level string `json:"level"`
Message string `json:"msg"`
Time time.Time `json:"ts"`
Fields map[string]interface{} `json:"fields"`
Level string `json:"level"`
Message string `json:"msg"`
Time time.Time `json:"ts"`
Fields map[string]interface{} `json:"fields"`
Additional StringSlice
}{
Level: "debug",
Message: "fake",
Expand All @@ -94,6 +118,7 @@ func BenchmarkStandardJSON(b *testing.B) {
"string4": "🙊",
"bool": true,
},
Additional: generateStringSlice(_sliceSize),
}
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
Expand Down