klauspost · klauspost · Oct 10, 2023 · Oct 8, 2023 · Oct 8, 2023 · Oct 8, 2023
diff --git a/flate/_gen/gen_inflate.go b/flate/_gen/gen_inflate.go
@@ -109,7 +109,7 @@ readLiteral:
 			dict.writeByte(byte(v))
 			if dict.availWrite() == 0 {
 				f.toRead = dict.readFlush()
-				f.step = (*decompressor).$FUNCNAME$
+				f.step = $FUNCNAME$
 				f.stepState = stateInit
 				f.b, f.nb = fb, fnb
 				return
@@ -275,7 +275,7 @@ copyHistory:
 
 		if dict.availWrite() == 0 || f.copyLen > 0 {
 			f.toRead = dict.readFlush()
-			f.step = (*decompressor).$FUNCNAME$ // We need to continue this work
+			f.step = $FUNCNAME$ // We need to continue this work
 			f.stepState = stateDict
 			f.b, f.nb = fb, fnb
 			return
@@ -291,13 +291,13 @@ copyHistory:
 		s = strings.Replace(s, "$TYPE$", t, -1)
 		f.WriteString(s)
 	}
-	f.WriteString("func (f *decompressor) huffmanBlockDecoder() func() {\n")
+	f.WriteString("func (f *decompressor) huffmanBlockDecoder() {\n")
 	f.WriteString("\tswitch f.r.(type) {\n")
 	for i, t := range types {
 		f.WriteString("\t\tcase " + t + ":\n")
-		f.WriteString("\t\t\treturn f.huffman" + names[i] + "\n")
+		f.WriteString("\t\t\tf.huffman" + names[i] + "()\n")
 	}
 	f.WriteString("\t\tdefault:\n")
-	f.WriteString("\t\t\treturn f.huffmanGenericReader\n")
+	f.WriteString("\t\t\tf.huffmanGenericReader()\n")
 	f.WriteString("\t}\n}\n")
 }
diff --git a/flate/inflate.go b/flate/inflate.go
@@ -43,6 +43,9 @@ var bitMask32 = [32]uint32{
 	0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
 } // up to 32 bits
 
+// zeroChunks is used to nullify decompressor.chunks array
+var zeroChunks = make([]uint16, huffmanNumChunks)
+
 // Initialize the fixedHuffmanDecoder only once upon first use.
 var fixedOnce sync.Once
 var fixedHuffmanDecoder huffmanDecoder
@@ -120,8 +123,9 @@ func (h *huffmanDecoder) init(lengths []int) bool {
 	const sanity = false
 
 	if h.chunks == nil {
-		h.chunks = &[huffmanNumChunks]uint16{}
+		h.chunks = new([huffmanNumChunks]uint16)
 	}
+
 	if h.maxRead != 0 {
 		*h = huffmanDecoder{chunks: h.chunks, links: h.links}
 	}
@@ -175,10 +179,10 @@ func (h *huffmanDecoder) init(lengths []int) bool {
 	}
 
 	h.maxRead = min
-	chunks := h.chunks[:]
-	for i := range chunks {
-		chunks[i] = 0
-	}
+
+	// instead of iterating over the whole array, just copy already null-filled
+	// slice in it.
+	copy(h.chunks[:], zeroChunks)
 
 	if max > huffmanChunkBits {
 		numLinks := 1 << (uint(max) - huffmanChunkBits)
@@ -202,8 +206,7 @@ func (h *huffmanDecoder) init(lengths []int) bool {
 			if cap(h.links[off]) < numLinks {
 				h.links[off] = make([]uint16, numLinks)
 			} else {
-				links := h.links[off][:0]
-				h.links[off] = links[:numLinks]
+				h.links[off] = h.links[off][:numLinks]
 			}
 		}
 	} else {
@@ -277,14 +280,26 @@ func (h *huffmanDecoder) init(lengths []int) bool {
 	return true
 }
 
-// The actual read interface needed by NewReader.
+// Reader is the actual read interface needed by NewReader.
 // If the passed in io.Reader does not also have ReadByte,
 // the NewReader will introduce its own buffering.
 type Reader interface {
 	io.Reader
 	io.ByteReader
 }
 
+type step uint8
+
+const (
+	copyData step = iota + 1
+	nextBlock
+	huffmanBytesBuffer
+	huffmanBytesReader
+	huffmanBufioReader
+	huffmanStringsReader
+	huffmanGenericReader
+)
+
 // Decompress state.
 type decompressor struct {
 	// Input source.
@@ -303,7 +318,7 @@ type decompressor struct {
 
 	// Next step in the decompression,
 	// and decompression state.
-	step      func(*decompressor)
+	step      step
 	stepState int
 	err       error
 	toRead    []byte
@@ -342,7 +357,7 @@ func (f *decompressor) nextBlock() {
 		// compressed, fixed Huffman tables
 		f.hl = &fixedHuffmanDecoder
 		f.hd = nil
-		f.huffmanBlockDecoder()()
+		f.huffmanBlockDecoder()
 		if debugDecode {
 			fmt.Println("predefinied huffman block")
 		}
@@ -353,7 +368,7 @@ func (f *decompressor) nextBlock() {
 		}
 		f.hl = &f.h1
 		f.hd = &f.h2
-		f.huffmanBlockDecoder()()
+		f.huffmanBlockDecoder()
 		if debugDecode {
 			fmt.Println("dynamic huffman block")
 		}
@@ -379,7 +394,9 @@ func (f *decompressor) Read(b []byte) (int, error) {
 		if f.err != nil {
 			return 0, f.err
 		}
-		f.step(f)
+
+		f.doStep()
+
 		if f.err != nil && len(f.toRead) == 0 {
 			f.toRead = f.dict.readFlush() // Flush what's left in case of error
 		}
@@ -410,7 +427,7 @@ func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
 			return total, f.err
 		}
 		if f.err == nil {
-			f.step(f)
+			f.doStep()
 		}
 		if len(f.toRead) == 0 && f.err != nil && !flushed {
 			f.toRead = f.dict.readFlush() // Flush what's left in case of error
@@ -631,7 +648,7 @@ func (f *decompressor) copyData() {
 
 	if f.dict.availWrite() == 0 || f.copyLen > 0 {
 		f.toRead = f.dict.readFlush()
-		f.step = (*decompressor).copyData
+		f.step = copyData
 		return
 	}
 	f.finishBlock()
@@ -644,7 +661,30 @@ func (f *decompressor) finishBlock() {
 		}
 		f.err = io.EOF
 	}
-	f.step = (*decompressor).nextBlock
+	f.step = nextBlock
+}
+
+func (f *decompressor) doStep() {
+	switch f.step {
+	case copyData:
+		f.copyData()
+	case nextBlock:
+		f.nextBlock()
+	case huffmanBytesBuffer:
+		f.huffmanBytesBuffer()
+	case huffmanBytesReader:
+		f.huffmanBytesReader()
+	case huffmanBufioReader:
+		f.huffmanBufioReader()
+	case huffmanStringsReader:
+		f.huffmanStringsReader()
+	case huffmanGenericReader:
+		f.huffmanGenericReader()
+	default:
+		if debugDecode {
+			fmt.Println("BUG: unexpected step state")
+		}
+	}
 }
 
 // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
@@ -747,7 +787,7 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error {
 		h1:       f.h1,
 		h2:       f.h2,
 		dict:     f.dict,
-		step:     (*decompressor).nextBlock,
+		step:     nextBlock,
 	}
 	f.dict.init(maxMatchOffset, dict)
 	return nil
@@ -768,7 +808,7 @@ func NewReader(r io.Reader) io.ReadCloser {
 	f.r = makeReader(r)
 	f.bits = new([maxNumLit + maxNumDist]int)
 	f.codebits = new([numCodes]int)
-	f.step = (*decompressor).nextBlock
+	f.step = nextBlock
 	f.dict.init(maxMatchOffset, nil)
 	return &f
 }
@@ -787,7 +827,7 @@ func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
 	f.r = makeReader(r)
 	f.bits = new([maxNumLit + maxNumDist]int)
 	f.codebits = new([numCodes]int)
-	f.step = (*decompressor).nextBlock
+	f.step = nextBlock
 	f.dict.init(maxMatchOffset, dict)
 	return &f
 }
diff --git a/flate/inflate_gen.go b/flate/inflate_gen.go
diff --git a/flate/reader_test.go b/flate/reader_test.go
@@ -64,13 +64,16 @@ func benchmarkDecode(b *testing.B, testfile, level, n int) {
 	w.Close()
 	buf1 := compressed.Bytes()
 	buf0, compressed, w = nil, nil, nil
-	runtime.GC()
-	b.StartTimer()
+	const ioCopyBuffSize = 32 * 1024 // taken from io.copyBuffer, in case passed buf==nil
+	ioCopyBuff := make([]byte, ioCopyBuffSize)
 	r := NewReader(bytes.NewReader(buf1))
 	res := r.(Resetter)
+	runtime.GC()
+	b.StartTimer()
+
 	for i := 0; i < b.N; i++ {
-		res.Reset(bytes.NewReader(buf1), nil)
-		io.Copy(io.Discard, r)
+		_ = res.Reset(bytes.NewReader(buf1), nil)
+		_, _ = io.CopyBuffer(io.Discard, r, ioCopyBuff)
 	}
 }