Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce allocations in flate decompressor and minor code improvements #869

Merged
merged 11 commits into from Oct 10, 2023
10 changes: 5 additions & 5 deletions flate/_gen/gen_inflate.go
Expand Up @@ -109,7 +109,7 @@ readLiteral:
dict.writeByte(byte(v))
if dict.availWrite() == 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).$FUNCNAME$
f.step = $FUNCNAME$
f.stepState = stateInit
f.b, f.nb = fb, fnb
return
Expand Down Expand Up @@ -275,7 +275,7 @@ copyHistory:
if dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = dict.readFlush()
f.step = (*decompressor).$FUNCNAME$ // We need to continue this work
f.step = $FUNCNAME$ // We need to continue this work
f.stepState = stateDict
f.b, f.nb = fb, fnb
return
Expand All @@ -291,13 +291,13 @@ copyHistory:
s = strings.Replace(s, "$TYPE$", t, -1)
f.WriteString(s)
}
f.WriteString("func (f *decompressor) huffmanBlockDecoder() func() {\n")
f.WriteString("func (f *decompressor) huffmanBlockDecoder() {\n")
f.WriteString("\tswitch f.r.(type) {\n")
for i, t := range types {
f.WriteString("\t\tcase " + t + ":\n")
f.WriteString("\t\t\treturn f.huffman" + names[i] + "\n")
f.WriteString("\t\t\tf.huffman" + names[i] + "()\n")
}
f.WriteString("\t\tdefault:\n")
f.WriteString("\t\t\treturn f.huffmanGenericReader\n")
f.WriteString("\t\t\tf.huffmanGenericReader()\n")
f.WriteString("\t}\n}\n")
}
76 changes: 58 additions & 18 deletions flate/inflate.go
Expand Up @@ -43,6 +43,9 @@ var bitMask32 = [32]uint32{
0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
} // up to 32 bits

// zeroChunks is used to nullify decompressor.chunks array
var zeroChunks = make([]uint16, huffmanNumChunks)

// Initialize the fixedHuffmanDecoder only once upon first use.
var fixedOnce sync.Once
var fixedHuffmanDecoder huffmanDecoder
Expand Down Expand Up @@ -120,8 +123,9 @@ func (h *huffmanDecoder) init(lengths []int) bool {
const sanity = false

if h.chunks == nil {
h.chunks = &[huffmanNumChunks]uint16{}
h.chunks = new([huffmanNumChunks]uint16)
}

if h.maxRead != 0 {
*h = huffmanDecoder{chunks: h.chunks, links: h.links}
}
Expand Down Expand Up @@ -175,10 +179,10 @@ func (h *huffmanDecoder) init(lengths []int) bool {
}

h.maxRead = min
chunks := h.chunks[:]
for i := range chunks {
chunks[i] = 0
}

// instead of iterating over the whole array, just copy already null-filled
// slice in it.
copy(h.chunks[:], zeroChunks)
klauspost marked this conversation as resolved.
Show resolved Hide resolved

if max > huffmanChunkBits {
numLinks := 1 << (uint(max) - huffmanChunkBits)
Expand All @@ -202,8 +206,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
if cap(h.links[off]) < numLinks {
h.links[off] = make([]uint16, numLinks)
} else {
links := h.links[off][:0]
h.links[off] = links[:numLinks]
h.links[off] = h.links[off][:numLinks]
}
}
} else {
Expand Down Expand Up @@ -277,14 +280,26 @@ func (h *huffmanDecoder) init(lengths []int) bool {
return true
}

// The actual read interface needed by NewReader.
// Reader is the actual read interface needed by NewReader.
// If the passed in io.Reader does not also have ReadByte,
// the NewReader will introduce its own buffering.
type Reader interface {
io.Reader
io.ByteReader
}

type step uint8

const (
copyData step = iota + 1
nextBlock
huffmanBytesBuffer
huffmanBytesReader
huffmanBufioReader
huffmanStringsReader
huffmanGenericReader
)

// Decompress state.
type decompressor struct {
// Input source.
Expand All @@ -303,7 +318,7 @@ type decompressor struct {

// Next step in the decompression,
// and decompression state.
step func(*decompressor)
step step
stepState int
err error
toRead []byte
Expand Down Expand Up @@ -342,7 +357,7 @@ func (f *decompressor) nextBlock() {
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlockDecoder()()
f.huffmanBlockDecoder()
if debugDecode {
fmt.Println("predefinied huffman block")
}
Expand All @@ -353,7 +368,7 @@ func (f *decompressor) nextBlock() {
}
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlockDecoder()()
f.huffmanBlockDecoder()
if debugDecode {
fmt.Println("dynamic huffman block")
}
Expand All @@ -379,7 +394,9 @@ func (f *decompressor) Read(b []byte) (int, error) {
if f.err != nil {
return 0, f.err
}
f.step(f)

f.doStep()

if f.err != nil && len(f.toRead) == 0 {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
}
Expand Down Expand Up @@ -410,7 +427,7 @@ func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
return total, f.err
}
if f.err == nil {
f.step(f)
f.doStep()
}
if len(f.toRead) == 0 && f.err != nil && !flushed {
f.toRead = f.dict.readFlush() // Flush what's left in case of error
Expand Down Expand Up @@ -631,7 +648,7 @@ func (f *decompressor) copyData() {

if f.dict.availWrite() == 0 || f.copyLen > 0 {
f.toRead = f.dict.readFlush()
f.step = (*decompressor).copyData
f.step = copyData
return
}
f.finishBlock()
Expand All @@ -644,7 +661,30 @@ func (f *decompressor) finishBlock() {
}
f.err = io.EOF
}
f.step = (*decompressor).nextBlock
f.step = nextBlock
}

func (f *decompressor) doStep() {
switch f.step {
case copyData:
f.copyData()
case nextBlock:
f.nextBlock()
case huffmanBytesBuffer:
f.huffmanBytesBuffer()
case huffmanBytesReader:
f.huffmanBytesReader()
case huffmanBufioReader:
f.huffmanBufioReader()
case huffmanStringsReader:
f.huffmanStringsReader()
case huffmanGenericReader:
f.huffmanGenericReader()
default:
if debugDecode {
fmt.Println("BUG: unexpected step state")
}
}
}

// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
Expand Down Expand Up @@ -747,7 +787,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error {
h1: f.h1,
h2: f.h2,
dict: f.dict,
step: (*decompressor).nextBlock,
step: nextBlock,
}
f.dict.init(maxMatchOffset, dict)
return nil
Expand All @@ -768,7 +808,7 @@ func NewReader(r io.Reader) io.ReadCloser {
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.step = nextBlock
f.dict.init(maxMatchOffset, nil)
return &f
}
Expand All @@ -787,7 +827,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
f.r = makeReader(r)
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.step = nextBlock
f.dict.init(maxMatchOffset, dict)
return &f
}
34 changes: 17 additions & 17 deletions flate/inflate_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 7 additions & 4 deletions flate/reader_test.go
Expand Up @@ -64,13 +64,16 @@ func benchmarkDecode(b *testing.B, testfile, level, n int) {
w.Close()
buf1 := compressed.Bytes()
buf0, compressed, w = nil, nil, nil
runtime.GC()
b.StartTimer()
const ioCopyBuffSize = 32 * 1024 // taken from io.copyBuffer, in case passed buf==nil
ioCopyBuff := make([]byte, ioCopyBuffSize)
r := NewReader(bytes.NewReader(buf1))
res := r.(Resetter)
runtime.GC()
b.StartTimer()

for i := 0; i < b.N; i++ {
res.Reset(bytes.NewReader(buf1), nil)
io.Copy(io.Discard, r)
_ = res.Reset(bytes.NewReader(buf1), nil)
_, _ = io.CopyBuffer(io.Discard, r, ioCopyBuff)
}
}

Expand Down