Skip to content

Commit

Permalink
feat:(ast) export some API for third-party JSON libs (#608)
Browse files Browse the repository at this point in the history
  • Loading branch information
AsterDY committed Mar 13, 2024
1 parent 5d45952 commit 0704d0a
Show file tree
Hide file tree
Showing 42 changed files with 14,807 additions and 12,983 deletions.
19 changes: 15 additions & 4 deletions ast/api.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build (amd64 && go1.16 && !go1.23) || (arm64 && go1.20 && !go1.23)
// +build amd64,go1.16,!go1.23 arm64,go1.20,!go1.23

/*
Expand Down Expand Up @@ -27,6 +28,7 @@ import (
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
uq `github.com/bytedance/sonic/unquote`
`github.com/bytedance/sonic/utf8`
)

var typeByte = rt.UnpackEface(byte(0)).Type
Expand Down Expand Up @@ -101,7 +103,7 @@ func (self *Parser) skip() (int, types.ParsingError) {

func (self *Node) encodeInterface(buf *[]byte) error {
//WARN: NOT compatible with json.Encoder
return encoder.EncodeInto(buf, self.packAny(), 0)
return encoder.EncodeInto(buf, self.packAny(), encoder.NoEncoderNewline)
}

func (self *Parser) skipFast() (int, types.ParsingError) {
Expand All @@ -112,13 +114,22 @@ func (self *Parser) skipFast() (int, types.ParsingError) {
return start, 0
}

func (self *Parser) getByPath(path ...interface{}) (int, types.ParsingError) {
fsm := types.NewStateMachine()
func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) {
var fsm *types.StateMachine
if validate {
fsm = types.NewStateMachine()
}
start := native.GetByPath(&self.s, &self.p, &path, fsm)
types.FreeStateMachine(fsm)
if validate {
types.FreeStateMachine(fsm)
}
runtime.KeepAlive(path)
if start < 0 {
return self.p, types.ParsingError(-start)
}
return start, 0
}

func validate_utf8(str string) bool {
return utf8.ValidateString(str)
}
91 changes: 50 additions & 41 deletions ast/api_compat.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,67 +19,69 @@
package ast

import (
`encoding/json`
`encoding/json`
`unicode/utf8`

`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
)

func init() {
println("WARNING:(ast) sonic only supports Go1.16~1.22, but your environment is not suitable")
println("WARNING:(ast) sonic only supports Go1.16~1.22, but your environment is not suitable")
}

func quote(buf *[]byte, val string) {
quoteString(buf, val)
quoteString(buf, val)
}

// unquote unescapes a internal JSON string (it doesn't count quotas at the begining and end)
func unquote(src string) (string, types.ParsingError) {
sp := rt.IndexChar(src, -1)
out, ok := unquoteBytes(rt.BytesFrom(sp, len(src)+2, len(src)+2))
if !ok {
return "", types.ERR_INVALID_ESCAPE
}
return rt.Mem2Str(out), 0
sp := rt.IndexChar(src, -1)
out, ok := unquoteBytes(rt.BytesFrom(sp, len(src)+2, len(src)+2))
if !ok {
return "", types.ERR_INVALID_ESCAPE
}
return rt.Mem2Str(out), 0
}


func (self *Parser) decodeValue() (val types.JsonState) {
e, v := decodeValue(self.s, self.p, self.dbuf == nil)
if e < 0 {
return v
}
self.p = e
return v
e, v := decodeValue(self.s, self.p, self.dbuf == nil)
if e < 0 {
return v
}
self.p = e
return v
}

func (self *Parser) skip() (int, types.ParsingError) {
e, s := skipValue(self.s, self.p)
if e < 0 {
return self.p, types.ParsingError(-e)
}
self.p = e
return s, 0
e, s := skipValue(self.s, self.p)
if e < 0 {
return self.p, types.ParsingError(-e)
}
self.p = e
return s, 0
}

func (self *Parser) skipFast() (int, types.ParsingError) {
e, s := skipValueFast(self.s, self.p)
if e < 0 {
return self.p, types.ParsingError(-e)
}
self.p = e
return s, 0
e, s := skipValueFast(self.s, self.p)
if e < 0 {
return self.p, types.ParsingError(-e)
}
self.p = e
return s, 0
}

func (self *Node) encodeInterface(buf *[]byte) error {
out, err := json.Marshal(self.packAny())
if err != nil {
return err
}
*buf = append(*buf, out...)
return nil
out, err := json.Marshal(self.packAny())
if err != nil {
return err
}
*buf = append(*buf, out...)
return nil
}

func (self *Parser) getByPath(path ...interface{}) (int, types.ParsingError) {
func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) {
for _, p := range path {
if idx, ok := p.(int); ok && idx >= 0 {
if err := self.searchIndex(idx); err != 0 {
Expand All @@ -93,13 +95,20 @@ func (self *Parser) getByPath(path ...interface{}) (int, types.ParsingError) {
panic("path must be either int(>=0) or string")
}
}
start, e := self.skip()

var start int
var e types.ParsingError
if validate {
start, e = self.skip()
} else {
start, e = self.skipFast()
}
if e != 0 {
return self.p, e
}
// t := switchRawType(self.s[start])
// if t == _V_NUMBER {
// self.p = 1 + backward(self.s, self.p-1)
// }
return start, 0
}

func validate_utf8(str string) bool {
return utf8.ValidString(str)
}
73 changes: 69 additions & 4 deletions ast/api_native_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// +build amd64,go1.16,!go1.23
//go:build (amd64 && go1.16 && !go1.23) || (arm64 && go1.20 && !go1.23)
// +build amd64,go1.16,!go1.23 arm64,go1.20,!go1.23

/*
* Copyright 2022 ByteDance Inc.
Expand Down Expand Up @@ -31,6 +32,9 @@ import (
)

func TestSortNodeTwitter(t *testing.T) {
if encoder.EnableFallback {
return
}
root, err := NewSearcher(_TwitterJson).GetByPath()
if err != nil {
t.Fatal(err)
Expand All @@ -39,7 +43,7 @@ func TestSortNodeTwitter(t *testing.T) {
if err != nil {
t.Fatal(err)
}
exp, err := encoder.Encode(obj, encoder.SortMapKeys)
exp, err := encoder.Encode(obj, encoder.SortMapKeys|encoder.NoEncoderNewline)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -117,7 +121,7 @@ func TestTypeCast2(t *testing.T) {
if len(rets) != 2 {
t.Fatal(i, rets)
}
require.Equal(t, rets[0].Interface(), c.exp)
require.Equal(t, c.exp, rets[0].Interface())
v := rets[1].Interface();
if v != c.err {
t.Fatal(i, v)
Expand All @@ -140,4 +144,65 @@ func TestStackAny(t *testing.T) {
if string(buf) != "1" {
t.Fatal(string(buf))
}
}
}


func Test_Export(t *testing.T) {
type args struct {
src string
path []interface{}
}
tests := []struct {
name string
args args
wantStart int
wantEnd int
wantTyp int
wantErr bool
wantValid bool
}{
{"bool", args{`[true ,2]`, []interface{}{0}}, 1, 5, V_TRUE, false, true},
{"bool", args{`[t2ue ,2]`, []interface{}{0}}, 1, 5, V_TRUE, false, false},
{"number", args{`[1 ,2]`, []interface{}{0}}, 1, 2, V_NUMBER, false, true},
{"number", args{`[1w ,2]`, []interface{}{0}}, 1, 3, V_NUMBER, false, false},
{"string", args{`[" " ,2]`, []interface{}{0}}, 1, 4, V_STRING, false, true},
{"string", args{`[" "] ,2]`, []interface{}{0}}, 1, 4, V_STRING, false, true},
{"object", args{`[{"":""} ,2]`, []interface{}{0}}, 1, 8, V_OBJECT, false, true},
{"object", args{`[{x} ,2]`, []interface{}{0}}, 1, 4, V_OBJECT, false, false},
{"arrauy", args{`[[{}] ,2]`, []interface{}{0}}, 1, 5, V_ARRAY, false, true},
{"arrauy", args{`[[xx] ,2]`, []interface{}{0}}, 1, 5, V_ARRAY, false, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotStart, gotEnd, gotTyp, err := _GetByPath(tt.args.src, tt.args.path...)
if (err != nil) != tt.wantErr {
t.Errorf("_GetByPath() error = %v, wantErr %v", err, tt.wantErr)
return
}
if gotStart != tt.wantStart {
t.Errorf("_GetByPath() gotStart = %v, want %v", gotStart, tt.wantStart)
}
if gotEnd != tt.wantEnd {
t.Errorf("_GetByPath() gotEnd = %v, want %v", gotEnd, tt.wantEnd)
}
if gotTyp != tt.wantTyp {
t.Errorf("_GetByPath() gotTyp = %v, want %v", gotTyp, tt.wantTyp)
}
gotStart, gotEnd, err = _SkipFast(tt.args.src, tt.wantStart)
if (err != nil) != tt.wantErr {
t.Errorf("_SkipFast() error = %v, wantErr %v", err, tt.wantErr)
return
}
if gotStart != tt.wantStart {
t.Errorf("_SkipFast() gotStart = %v, want %v", gotStart, tt.wantStart)
}
if gotEnd != tt.wantEnd {
t.Errorf("_SkipFast() gotEnd = %v, want %v", gotEnd, tt.wantEnd)
}
valid := _ValidSyntax(tt.args.src[tt.wantStart:tt.wantEnd])
if valid != tt.wantValid {
t.Errorf("_ValidSyntax() gotValid = %v, want %v", valid, tt.wantValid)
}
})
}
}
2 changes: 1 addition & 1 deletion ast/b64_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package ast

import (
`github.com/chenzhuoyu/base64x`
`github.com/chenzhuoyu/base64x`
)

func decodeBase64(src string) ([]byte, error) {
Expand Down
33 changes: 33 additions & 0 deletions ast/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -583,3 +583,36 @@ func skipArray(src string, pos int) (ret int, start int) {
pos++
}
}

// DecodeString decodes a JSON string from pos and return golang string.
// - needEsc indicates if to unescaped escaping chars
// - hasEsc tells if the returned string has escaping chars
// - validStr enables validating UTF8 charset
//
func _DecodeString(src string, pos int, needEsc bool, validStr bool) (v string, ret int, hasEsc bool) {
p := NewParserObj(src)
p.p = pos
switch val := p.decodeValue(); val.Vt {
case types.V_STRING:
str := p.s[val.Iv : p.p-1]
if validStr && !validate_utf8(str) {
return "", -int(types.ERR_INVALID_UTF8), false
}
/* fast path: no escape sequence */
if val.Ep == -1 {
return str, p.p, false
} else if !needEsc {
return str, p.p, true
}
/* unquote the string */
out, err := unquote(str)
/* check for errors */
if err != 0 {
return "", -int(err), true
} else {
return out, p.p, true
}
default:
return "", -int(_ERR_UNSUPPORT_TYPE), false
}
}
69 changes: 69 additions & 0 deletions ast/decode_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright 2022 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ast

import (
"testing"
"unicode/utf8"

"github.com/bytedance/sonic/internal/rt"
)

func Test_DecodeString(t *testing.T) {
type args struct {
src string
pos int
needEsc bool
validStr bool
}
invalidstr := rt.Mem2Str([]byte{'"',193,255,'"'})
println(utf8.ValidString(invalidstr))

tests := []struct {
name string
args args
wantV string
wantRet int
wantHasEsc bool
}{
{"empty", args{`""`, 0, false, false}, "", 2, false},
{"one", args{`"1"`, 0, false, false}, "1", 3, false},
{"escape", args{`"\\"`, 0, false, false}, `\\`, 4, true},
{"escape", args{`"\\"`, 0, true, true}, `\`, 4, true},
{"uft8", args{`"\u263a"`, 0, false, false}, `\u263a`, 8, true},
{"uft8", args{`"\u263a"`, 0, true, true}, `☺`, 8, true},
{"invalid uft8", args{`"\xx"`, 0, false, false}, `\xx`, 5, true},
{"invalid escape", args{`"\xx"`, 0, false, true}, `\xx`, 5, true},
{"invalid escape", args{`"\xx"`, 0, true, true}, ``, -3, true},
{"invalid string", args{invalidstr, 0, false, false}, invalidstr[1:3], 4, false},
{"invalid string", args{invalidstr, 0, true, true}, "", -10, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotV, gotRet, gotHasEsc := _DecodeString(tt.args.src, tt.args.pos, tt.args.needEsc, tt.args.validStr)
if gotV != tt.wantV {
t.Errorf("_DecodeString() gotV = %v, want %v", gotV, tt.wantV)
}
if gotRet != tt.wantRet {
t.Errorf("_DecodeString() gotRet = %v, want %v", gotRet, tt.wantRet)
}
if gotHasEsc != tt.wantHasEsc {
t.Errorf("_DecodeString() gotHasEsc = %v, want %v", gotHasEsc, tt.wantHasEsc)
}
})
}
}

0 comments on commit 0704d0a

Please sign in to comment.