Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat:(ast) export some API for third-party JSON libs #608

Merged
merged 9 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 15 additions & 4 deletions ast/api.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build (amd64 && go1.16 && !go1.23) || (arm64 && go1.20 && !go1.23)
// +build amd64,go1.16,!go1.23 arm64,go1.20,!go1.23

/*
Expand Down Expand Up @@ -27,6 +28,7 @@ import (
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
uq `github.com/bytedance/sonic/unquote`
`github.com/bytedance/sonic/utf8`
)

var typeByte = rt.UnpackEface(byte(0)).Type
Expand Down Expand Up @@ -101,7 +103,7 @@ func (self *Parser) skip() (int, types.ParsingError) {

func (self *Node) encodeInterface(buf *[]byte) error {
//WARN: NOT compatible with json.Encoder
return encoder.EncodeInto(buf, self.packAny(), 0)
return encoder.EncodeInto(buf, self.packAny(), encoder.NoEncoderNewline)
}

func (self *Parser) skipFast() (int, types.ParsingError) {
Expand All @@ -112,13 +114,22 @@ func (self *Parser) skipFast() (int, types.ParsingError) {
return start, 0
}

func (self *Parser) getByPath(path ...interface{}) (int, types.ParsingError) {
fsm := types.NewStateMachine()
func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) {
var fsm *types.StateMachine
if validate {
fsm = types.NewStateMachine()
}
start := native.GetByPath(&self.s, &self.p, &path, fsm)
types.FreeStateMachine(fsm)
if validate {
types.FreeStateMachine(fsm)
}
runtime.KeepAlive(path)
if start < 0 {
return self.p, types.ParsingError(-start)
}
return start, 0
}

func validate_utf8(str string) bool {
return utf8.ValidateString(str)
}
91 changes: 50 additions & 41 deletions ast/api_compat.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,67 +19,69 @@
package ast

import (
`encoding/json`
`encoding/json`
`unicode/utf8`

`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
`github.com/bytedance/sonic/internal/native/types`
`github.com/bytedance/sonic/internal/rt`
)

func init() {
println("WARNING:(ast) sonic only supports Go1.16~1.22, but your environment is not suitable")
println("WARNING:(ast) sonic only supports Go1.16~1.22, but your environment is not suitable")
}

func quote(buf *[]byte, val string) {
quoteString(buf, val)
quoteString(buf, val)
}

// unquote unescapes a internal JSON string (it doesn't count quotas at the begining and end)
func unquote(src string) (string, types.ParsingError) {
sp := rt.IndexChar(src, -1)
out, ok := unquoteBytes(rt.BytesFrom(sp, len(src)+2, len(src)+2))
if !ok {
return "", types.ERR_INVALID_ESCAPE
}
return rt.Mem2Str(out), 0
sp := rt.IndexChar(src, -1)
out, ok := unquoteBytes(rt.BytesFrom(sp, len(src)+2, len(src)+2))
if !ok {
return "", types.ERR_INVALID_ESCAPE
}
return rt.Mem2Str(out), 0
}


func (self *Parser) decodeValue() (val types.JsonState) {
e, v := decodeValue(self.s, self.p, self.dbuf == nil)
if e < 0 {
return v
}
self.p = e
return v
e, v := decodeValue(self.s, self.p, self.dbuf == nil)
if e < 0 {
return v
}
self.p = e
return v
}

func (self *Parser) skip() (int, types.ParsingError) {
e, s := skipValue(self.s, self.p)
if e < 0 {
return self.p, types.ParsingError(-e)
}
self.p = e
return s, 0
e, s := skipValue(self.s, self.p)
if e < 0 {
return self.p, types.ParsingError(-e)
}
self.p = e
return s, 0
}

func (self *Parser) skipFast() (int, types.ParsingError) {
e, s := skipValueFast(self.s, self.p)
if e < 0 {
return self.p, types.ParsingError(-e)
}
self.p = e
return s, 0
e, s := skipValueFast(self.s, self.p)
if e < 0 {
return self.p, types.ParsingError(-e)
}
self.p = e
return s, 0
}

func (self *Node) encodeInterface(buf *[]byte) error {
out, err := json.Marshal(self.packAny())
if err != nil {
return err
}
*buf = append(*buf, out...)
return nil
out, err := json.Marshal(self.packAny())
if err != nil {
return err
}
*buf = append(*buf, out...)
return nil
}

func (self *Parser) getByPath(path ...interface{}) (int, types.ParsingError) {
func (self *Parser) getByPath(validate bool, path ...interface{}) (int, types.ParsingError) {
for _, p := range path {
if idx, ok := p.(int); ok && idx >= 0 {
if err := self.searchIndex(idx); err != 0 {
Expand All @@ -93,13 +95,20 @@ func (self *Parser) getByPath(path ...interface{}) (int, types.ParsingError) {
panic("path must be either int(>=0) or string")
}
}
start, e := self.skip()

var start int
var e types.ParsingError
if validate {
start, e = self.skip()
} else {
start, e = self.skipFast()
}
if e != 0 {
return self.p, e
}
// t := switchRawType(self.s[start])
// if t == _V_NUMBER {
// self.p = 1 + backward(self.s, self.p-1)
// }
return start, 0
}

func validate_utf8(str string) bool {
return utf8.ValidString(str)
}
73 changes: 69 additions & 4 deletions ast/api_native_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// +build amd64,go1.16,!go1.23
//go:build (amd64 && go1.16 && !go1.23) || (arm64 && go1.20 && !go1.23)
// +build amd64,go1.16,!go1.23 arm64,go1.20,!go1.23

/*
* Copyright 2022 ByteDance Inc.
Expand Down Expand Up @@ -31,6 +32,9 @@ import (
)

func TestSortNodeTwitter(t *testing.T) {
if encoder.EnableFallback {
return
}
root, err := NewSearcher(_TwitterJson).GetByPath()
if err != nil {
t.Fatal(err)
Expand All @@ -39,7 +43,7 @@ func TestSortNodeTwitter(t *testing.T) {
if err != nil {
t.Fatal(err)
}
exp, err := encoder.Encode(obj, encoder.SortMapKeys)
exp, err := encoder.Encode(obj, encoder.SortMapKeys|encoder.NoEncoderNewline)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -117,7 +121,7 @@ func TestTypeCast2(t *testing.T) {
if len(rets) != 2 {
t.Fatal(i, rets)
}
require.Equal(t, rets[0].Interface(), c.exp)
require.Equal(t, c.exp, rets[0].Interface())
v := rets[1].Interface();
if v != c.err {
t.Fatal(i, v)
Expand All @@ -140,4 +144,65 @@ func TestStackAny(t *testing.T) {
if string(buf) != "1" {
t.Fatal(string(buf))
}
}
}


func Test_Export(t *testing.T) {
type args struct {
src string
path []interface{}
}
tests := []struct {
name string
args args
wantStart int
wantEnd int
wantTyp int
wantErr bool
wantValid bool
}{
{"bool", args{`[true ,2]`, []interface{}{0}}, 1, 5, V_TRUE, false, true},
{"bool", args{`[t2ue ,2]`, []interface{}{0}}, 1, 5, V_TRUE, false, false},
{"number", args{`[1 ,2]`, []interface{}{0}}, 1, 2, V_NUMBER, false, true},
{"number", args{`[1w ,2]`, []interface{}{0}}, 1, 3, V_NUMBER, false, false},
{"string", args{`[" " ,2]`, []interface{}{0}}, 1, 4, V_STRING, false, true},
{"string", args{`[" "] ,2]`, []interface{}{0}}, 1, 4, V_STRING, false, true},
{"object", args{`[{"":""} ,2]`, []interface{}{0}}, 1, 8, V_OBJECT, false, true},
{"object", args{`[{x} ,2]`, []interface{}{0}}, 1, 4, V_OBJECT, false, false},
{"arrauy", args{`[[{}] ,2]`, []interface{}{0}}, 1, 5, V_ARRAY, false, true},
{"arrauy", args{`[[xx] ,2]`, []interface{}{0}}, 1, 5, V_ARRAY, false, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotStart, gotEnd, gotTyp, err := _GetByPath(tt.args.src, tt.args.path...)
if (err != nil) != tt.wantErr {
t.Errorf("_GetByPath() error = %v, wantErr %v", err, tt.wantErr)
return
}
if gotStart != tt.wantStart {
t.Errorf("_GetByPath() gotStart = %v, want %v", gotStart, tt.wantStart)
}
if gotEnd != tt.wantEnd {
t.Errorf("_GetByPath() gotEnd = %v, want %v", gotEnd, tt.wantEnd)
}
if gotTyp != tt.wantTyp {
t.Errorf("_GetByPath() gotTyp = %v, want %v", gotTyp, tt.wantTyp)
}
gotStart, gotEnd, err = _SkipFast(tt.args.src, tt.wantStart)
if (err != nil) != tt.wantErr {
t.Errorf("_SkipFast() error = %v, wantErr %v", err, tt.wantErr)
return
}
if gotStart != tt.wantStart {
t.Errorf("_SkipFast() gotStart = %v, want %v", gotStart, tt.wantStart)
}
if gotEnd != tt.wantEnd {
t.Errorf("_SkipFast() gotEnd = %v, want %v", gotEnd, tt.wantEnd)
}
valid := _ValidSyntax(tt.args.src[tt.wantStart:tt.wantEnd])
if valid != tt.wantValid {
t.Errorf("_ValidSyntax() gotValid = %v, want %v", valid, tt.wantValid)
}
})
}
}
2 changes: 1 addition & 1 deletion ast/b64_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package ast

import (
`github.com/chenzhuoyu/base64x`
`github.com/chenzhuoyu/base64x`
)

func decodeBase64(src string) ([]byte, error) {
Expand Down
33 changes: 33 additions & 0 deletions ast/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -583,3 +583,36 @@ func skipArray(src string, pos int) (ret int, start int) {
pos++
}
}

// DecodeString decodes a JSON string from pos and return golang string.
// - needEsc indicates if to unescaped escaping chars
// - hasEsc tells if the returned string has escaping chars
// - validStr enables validating UTF8 charset
//
func _DecodeString(src string, pos int, needEsc bool, validStr bool) (v string, ret int, hasEsc bool) {
p := NewParserObj(src)
p.p = pos
switch val := p.decodeValue(); val.Vt {
case types.V_STRING:
str := p.s[val.Iv : p.p-1]
if validStr && !validate_utf8(str) {
return "", -int(types.ERR_INVALID_UTF8), false
}
/* fast path: no escape sequence */
if val.Ep == -1 {
return str, p.p, false
} else if !needEsc {
return str, p.p, true
}
/* unquote the string */
out, err := unquote(str)
/* check for errors */
if err != 0 {
return "", -int(err), true
} else {
return out, p.p, true
}
default:
return "", -int(_ERR_UNSUPPORT_TYPE), false
}
}
69 changes: 69 additions & 0 deletions ast/decode_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright 2022 ByteDance Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ast

import (
"testing"
"unicode/utf8"

"github.com/bytedance/sonic/internal/rt"
)

func Test_DecodeString(t *testing.T) {
type args struct {
src string
pos int
needEsc bool
validStr bool
}
invalidstr := rt.Mem2Str([]byte{'"',193,255,'"'})
println(utf8.ValidString(invalidstr))

tests := []struct {
name string
args args
wantV string
wantRet int
wantHasEsc bool
}{
{"empty", args{`""`, 0, false, false}, "", 2, false},
{"one", args{`"1"`, 0, false, false}, "1", 3, false},
{"escape", args{`"\\"`, 0, false, false}, `\\`, 4, true},
{"escape", args{`"\\"`, 0, true, true}, `\`, 4, true},
{"uft8", args{`"\u263a"`, 0, false, false}, `\u263a`, 8, true},
{"uft8", args{`"\u263a"`, 0, true, true}, `☺`, 8, true},
{"invalid uft8", args{`"\xx"`, 0, false, false}, `\xx`, 5, true},
{"invalid escape", args{`"\xx"`, 0, false, true}, `\xx`, 5, true},
{"invalid escape", args{`"\xx"`, 0, true, true}, ``, -3, true},
{"invalid string", args{invalidstr, 0, false, false}, invalidstr[1:3], 4, false},
{"invalid string", args{invalidstr, 0, true, true}, "", -10, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotV, gotRet, gotHasEsc := _DecodeString(tt.args.src, tt.args.pos, tt.args.needEsc, tt.args.validStr)
if gotV != tt.wantV {
t.Errorf("_DecodeString() gotV = %v, want %v", gotV, tt.wantV)
}
if gotRet != tt.wantRet {
t.Errorf("_DecodeString() gotRet = %v, want %v", gotRet, tt.wantRet)
}
if gotHasEsc != tt.wantHasEsc {
t.Errorf("_DecodeString() gotHasEsc = %v, want %v", gotHasEsc, tt.wantHasEsc)
}
})
}
}