diff --git a/buffer/buffer.go b/buffer/buffer.go index 9e929cd98..27fb5cd5d 100644 --- a/buffer/buffer.go +++ b/buffer/buffer.go @@ -42,6 +42,11 @@ func (b *Buffer) AppendByte(v byte) { b.bs = append(b.bs, v) } +// AppendBytes writes a single byte to the Buffer. +func (b *Buffer) AppendBytes(v []byte) { + b.bs = append(b.bs, v...) +} + // AppendString writes a string to the Buffer. func (b *Buffer) AppendString(s string) { b.bs = append(b.bs, s...) diff --git a/zapcore/json_encoder.go b/zapcore/json_encoder.go index ce6838de2..c8ab86979 100644 --- a/zapcore/json_encoder.go +++ b/zapcore/json_encoder.go @@ -486,73 +486,98 @@ func (enc *jsonEncoder) appendFloat(val float64, bitSize int) { // Unlike the standard library's encoder, it doesn't attempt to protect the // user from browser vulnerabilities or JSONP-related problems. func (enc *jsonEncoder) safeAddString(s string) { - for i := 0; i < len(s); { - if enc.tryAddRuneSelf(s[i]) { - i++ - continue - } - r, size := utf8.DecodeRuneInString(s[i:]) - if enc.tryAddRuneError(r, size) { - i++ - continue - } - enc.buf.AppendString(s[i : i+size]) - i += size - } + safeAppendStringLike( + (*buffer.Buffer).AppendString, + utf8.DecodeRuneInString, + enc.buf, + s, + ) } // safeAddByteString is no-alloc equivalent of safeAddString(string(s)) for s []byte. func (enc *jsonEncoder) safeAddByteString(s []byte) { + safeAppendStringLike( + (*buffer.Buffer).AppendBytes, + utf8.DecodeRune, + enc.buf, + s, + ) +} + +// safeAppendStringLike is a generic implementation of safeAddString and safeAddByteString. +// It appends a string or byte slice to the buffer, escaping all special characters. +func safeAppendStringLike[S []byte | string]( + // appendTo appends this string-like object to the buffer. + appendTo func(*buffer.Buffer, S), + // decodeRune decodes the next rune from the string-like object + // and returns its value and width in bytes. + decodeRune func(S) (rune, int), + buf *buffer.Buffer, + s S, +) { + // The encoding logic below works by skipping over characters + // that can be safely copied as-is, + // until a character is found that needs special handling. + // At that point, we copy everything we've seen so far, + // and then handle that special character. + // + // last is the index of the last byte that was copied to the buffer. + last := 0 for i := 0; i < len(s); { - if enc.tryAddRuneSelf(s[i]) { + if s[i] >= utf8.RuneSelf { + // Character >= RuneSelf may be part of a multi-byte rune. + // They need to be decoded before we can decide how to handle them. + r, size := decodeRune(s[i:]) + if r != utf8.RuneError || size != 1 { + // No special handling required. + // Skip over this rune and continue. + i += size + continue + } + + // Invalid UTF-8 sequence. + // Replace it with the Unicode replacement character. + appendTo(buf, s[last:i]) + buf.AppendString(`\ufffd`) + i++ - continue - } - r, size := utf8.DecodeRune(s[i:]) - if enc.tryAddRuneError(r, size) { + last = i + } else { + // Character < RuneSelf is a single-byte UTF-8 rune. + if s[i] >= 0x20 && s[i] != '\\' && s[i] != '"' { + // No escaping necessary. + // Skip over this character and continue. + i++ + continue + } + + // This character needs to be escaped. + appendTo(buf, s[last:i]) + switch s[i] { + case '\\', '"': + buf.AppendByte('\\') + buf.AppendByte(s[i]) + case '\n': + buf.AppendByte('\\') + buf.AppendByte('n') + case '\r': + buf.AppendByte('\\') + buf.AppendByte('r') + case '\t': + buf.AppendByte('\\') + buf.AppendByte('t') + default: + // Encode bytes < 0x20, except for the escape sequences above. + buf.AppendString(`\u00`) + buf.AppendByte(_hex[s[i]>>4]) + buf.AppendByte(_hex[s[i]&0xF]) + } + i++ - continue + last = i } - enc.buf.Write(s[i : i+size]) - i += size } -} -// tryAddRuneSelf appends b if it is valid UTF-8 character represented in a single byte. -func (enc *jsonEncoder) tryAddRuneSelf(b byte) bool { - if b >= utf8.RuneSelf { - return false - } - if b >= 0x20 && b != '\\' && b != '"' { - enc.buf.AppendByte(b) - return true - } - switch b { - case '\\', '"': - enc.buf.AppendByte('\\') - enc.buf.AppendByte(b) - case '\n': - enc.buf.AppendByte('\\') - enc.buf.AppendByte('n') - case '\r': - enc.buf.AppendByte('\\') - enc.buf.AppendByte('r') - case '\t': - enc.buf.AppendByte('\\') - enc.buf.AppendByte('t') - default: - // Encode bytes < 0x20, except for the escape sequences above. - enc.buf.AppendString(`\u00`) - enc.buf.AppendByte(_hex[b>>4]) - enc.buf.AppendByte(_hex[b&0xF]) - } - return true -} - -func (enc *jsonEncoder) tryAddRuneError(r rune, size int) bool { - if r == utf8.RuneError && size == 1 { - enc.buf.AppendString(`\ufffd`) - return true - } - return false + // add remaining + appendTo(buf, s[last:]) } diff --git a/zapcore/json_encoder_bench_test.go b/zapcore/json_encoder_bench_test.go index ef8001993..9182b3951 100644 --- a/zapcore/json_encoder_bench_test.go +++ b/zapcore/json_encoder_bench_test.go @@ -22,6 +22,7 @@ package zapcore_test import ( "encoding/json" + "fmt" "testing" "time" @@ -51,7 +52,28 @@ func BenchmarkZapJSONFloat32AndComplex64(b *testing.B) { }) } +const _sliceSize = 5000 + +type StringSlice []string + +func (s StringSlice) MarshalLogArray(encoder ArrayEncoder) error { + for _, str := range s { + encoder.AppendString(str) + } + return nil +} + +func generateStringSlice(n int) StringSlice { + output := make(StringSlice, 0, n) + for i := 0; i < n; i++ { + output = append(output, fmt.Sprint("00000000-0000-0000-0000-0000000000", i)) + } + return output +} + func BenchmarkZapJSON(b *testing.B) { + additional := generateStringSlice(_sliceSize) + b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { enc := NewJSONEncoder(testEncoderConfig()) @@ -64,6 +86,7 @@ func BenchmarkZapJSON(b *testing.B) { enc.AddString("string3", "🤔") enc.AddString("string4", "🙊") enc.AddBool("bool", true) + _ = enc.AddArray("test", additional) buf, _ := enc.EncodeEntry(Entry{ Message: "fake", Level: DebugLevel, @@ -75,10 +98,11 @@ func BenchmarkZapJSON(b *testing.B) { func BenchmarkStandardJSON(b *testing.B) { record := struct { - Level string `json:"level"` - Message string `json:"msg"` - Time time.Time `json:"ts"` - Fields map[string]interface{} `json:"fields"` + Level string `json:"level"` + Message string `json:"msg"` + Time time.Time `json:"ts"` + Fields map[string]interface{} `json:"fields"` + Additional StringSlice }{ Level: "debug", Message: "fake", @@ -94,6 +118,7 @@ func BenchmarkStandardJSON(b *testing.B) { "string4": "🙊", "bool": true, }, + Additional: generateStringSlice(_sliceSize), } b.ResetTimer() b.RunParallel(func(pb *testing.PB) { diff --git a/zapcore/json_encoder_impl_test.go b/zapcore/json_encoder_impl_test.go index 5fc7b4db2..5f8126275 100644 --- a/zapcore/json_encoder_impl_test.go +++ b/zapcore/json_encoder_impl_test.go @@ -29,10 +29,13 @@ import ( "testing" "testing/quick" "time" + "unicode/utf8" + "go.uber.org/zap/buffer" "go.uber.org/zap/internal/bufferpool" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.uber.org/multierr" ) @@ -662,3 +665,72 @@ func TestJSONQuick(t *testing.T) { check(asciiRoundTripsCorrectlyString) check(asciiRoundTripsCorrectlyByteString) } + +var _stringLikeCorpus = []string{ + "", + "foo", + "bar", + "a\nb", + "a\tb", + "a\\b", + `a"b`, +} + +func FuzzSafeAppendStringLike_bytes(f *testing.F) { + for _, s := range _stringLikeCorpus { + f.Add([]byte(s)) + } + f.Fuzz(func(t *testing.T, b []byte) { + if !utf8.Valid(b) { + t.Skip() + } + + fuzzSafeAppendStringLike(t, string(b), func(buf *buffer.Buffer) { + safeAppendStringLike( + (*buffer.Buffer).AppendBytes, + utf8.DecodeRune, + buf, + b, + ) + }) + }) +} + +func FuzzSafeAppendStringLike_string(f *testing.F) { + for _, s := range _stringLikeCorpus { + f.Add(s) + } + f.Fuzz(func(t *testing.T, s string) { + if !utf8.ValidString(s) { + t.Skip() + } + + fuzzSafeAppendStringLike(t, s, func(buf *buffer.Buffer) { + safeAppendStringLike( + (*buffer.Buffer).AppendString, + utf8.DecodeRuneInString, + buf, + s, + ) + }) + }) +} + +func fuzzSafeAppendStringLike( + t *testing.T, + want string, + writeString func(*buffer.Buffer), +) { + t.Helper() + + buf := bufferpool.Get() + defer buf.Free() + + buf.AppendByte('"') + writeString(buf) + buf.AppendByte('"') + + var got string + require.NoError(t, json.Unmarshal(buf.Bytes(), &got)) + assert.Equal(t, want, got) +}