Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Timdw/pr to pierrec #100

Merged
merged 4 commits into from Oct 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/lz4c/compress.go
Expand Up @@ -10,8 +10,8 @@ import (
"code.cloudfoundry.org/bytefmt"
"github.com/schollz/progressbar"

"github.com/pierrec/cmdflag"
"github.com/pierrec/lz4"
"github.com/pierrec/cmdflag"
)

// Compress compresses a set of files or from stdin to stdout.
Expand Down
3 changes: 2 additions & 1 deletion fuzz/lz4.go
Expand Up @@ -2,8 +2,9 @@ package lz4

import (
"bytes"
"github.com/pierrec/lz4"
"io"

"github.com/pierrec/lz4"
)

// Fuzz function for the Reader and Writer.
Expand Down
2 changes: 1 addition & 1 deletion internal/xxh32/xxh32zero_test.go
Expand Up @@ -6,8 +6,8 @@ import (
"hash/fnv"
"testing"

qt "github.com/frankban/quicktest"
"github.com/pierrec/lz4/internal/xxh32"
qt "github.com/frankban/quicktest"
)

type test struct {
Expand Down
13 changes: 8 additions & 5 deletions lz4.go
Expand Up @@ -10,18 +10,20 @@
//
package lz4

import "math/bits"

import "sync"
import (
"math/bits"
"sync"
)

const (
// Extension is the LZ4 frame file name extension
Extension = ".lz4"
// Version is the LZ4 frame format version
Version = 1

frameMagic uint32 = 0x184D2204
frameSkipMagic uint32 = 0x184D2A50
frameMagic uint32 = 0x184D2204
frameSkipMagic uint32 = 0x184D2A50
frameMagicLegacy uint32 = 0x184C2102

// The following constants are used to setup the compression algorithm.
minMatch = 4 // the minimum size of the match sequence size (4 bytes)
Expand Down Expand Up @@ -108,6 +110,7 @@ type Header struct {
done bool // Header processed flag (Read or Write and checked).
}

// Reset reset internal status
func (h *Header) Reset() {
h.done = false
}
207 changes: 207 additions & 0 deletions reader_legacy.go
@@ -0,0 +1,207 @@
package lz4

import (
"encoding/binary"
"fmt"
"io"
)

// ReaderLegacy implements the LZ4Demo frame decoder.
// The Header is set after the first call to Read().
type ReaderLegacy struct {
Header
// Handler called when a block has been successfully read.
// It provides the number of bytes read.
OnBlockDone func(size int)

lastBlock bool
buf [8]byte // Scrap buffer.
pos int64 // Current position in src.
src io.Reader // Source.
zdata []byte // Compressed data.
data []byte // Uncompressed data.
idx int // Index of unread bytes into data.
skip int64 // Bytes to skip before next read.
dpos int64 // Position in dest
}

// NewReaderLegacy returns a new LZ4Demo frame decoder.
// No access to the underlying io.Reader is performed.
func NewReaderLegacy(src io.Reader) *ReaderLegacy {
r := &ReaderLegacy{src: src}
return r
}

// readHeader checks the frame magic number and parses the frame descriptoz.
// Skippable frames are supported even as a first frame although the LZ4
// specifications recommends skippable frames not to be used as first frames.
func (z *ReaderLegacy) readLegacyHeader() error {
z.lastBlock = false
magic, err := z.readUint32()
if err != nil {
z.pos += 4
if err == io.ErrUnexpectedEOF {
return io.EOF
}
return err
}
if magic != frameMagicLegacy {
return ErrInvalid
}
z.pos += 4

// Legacy has fixed 8MB blocksizes
// https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md#legacy-frame
bSize := blockSize4M * 2

// Allocate the compressed/uncompressed buffers.
// The compressed buffer cannot exceed the uncompressed one.
if n := 2 * bSize; cap(z.zdata) < n {
z.zdata = make([]byte, n, n)
}
if debugFlag {
debug("header block max size size=%d", bSize)
}
z.zdata = z.zdata[:bSize]
z.data = z.zdata[:cap(z.zdata)][bSize:]
z.idx = len(z.data)

z.Header.done = true
if debugFlag {
debug("header read: %v", z.Header)
}

return nil
}

// Read decompresses data from the underlying source into the supplied buffer.
//
// Since there can be multiple streams concatenated, Header values may
// change between calls to Read(). If that is the case, no data is actually read from
// the underlying io.Reader, to allow for potential input buffer resizing.
func (z *ReaderLegacy) Read(buf []byte) (int, error) {
if debugFlag {
debug("Read buf len=%d", len(buf))
}
if !z.Header.done {
if err := z.readLegacyHeader(); err != nil {
return 0, err
}
if debugFlag {
debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
}
}

if len(buf) == 0 {
return 0, nil
}

if z.idx == len(z.data) {
// No data ready for reading, process the next block.
if debugFlag {
debug(" reading block from writer %d %d", z.idx, blockSize4M*2)
}

// Reset uncompressed buffer
z.data = z.zdata[:cap(z.zdata)][len(z.zdata):]

bLen, err := z.readUint32()
if err != nil {
return 0, err
}
if debugFlag {
debug(" bLen %d (0x%x) offset = %d (0x%x)", bLen, bLen, z.pos, z.pos)
}
z.pos += 4

// Legacy blocks are always compressed, even when detrimental
if debugFlag {
debug(" compressed block size %d", bLen)
}

if int(bLen) > cap(z.data) {
return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
}
zdata := z.zdata[:bLen]
if _, err := io.ReadFull(z.src, zdata); err != nil {
return 0, err
}
z.pos += int64(bLen)

n, err := UncompressBlock(zdata, z.data)
if err != nil {
return 0, err
}

z.data = z.data[:n]
if z.OnBlockDone != nil {
z.OnBlockDone(n)
}

z.idx = 0

// Legacy blocks are fixed to 8MB, if we read a decompressed block smaller than this
// it means we've reached the end...
if n < blockSize4M*2 {
z.lastBlock = true
}
}

if z.skip > int64(len(z.data[z.idx:])) {
z.skip -= int64(len(z.data[z.idx:]))
z.dpos += int64(len(z.data[z.idx:]))
z.idx = len(z.data)
return 0, nil
}

z.idx += int(z.skip)
z.dpos += z.skip
z.skip = 0

n := copy(buf, z.data[z.idx:])
z.idx += n
z.dpos += int64(n)
if debugFlag {
debug("%v] copied %d bytes to input (%d:%d)", z.lastBlock, n, z.idx, len(z.data))
}
if z.lastBlock && len(z.data) == z.idx {
return n, io.EOF
}
return n, nil
}

// Seek implements io.Seeker, but supports seeking forward from the current
// position only. Any other seek will return an error. Allows skipping output
// bytes which aren't needed, which in some scenarios is faster than reading
// and discarding them.
// Note this may cause future calls to Read() to read 0 bytes if all of the
// data they would have returned is skipped.
func (z *ReaderLegacy) Seek(offset int64, whence int) (int64, error) {
if offset < 0 || whence != io.SeekCurrent {
return z.dpos + z.skip, ErrUnsupportedSeek
}
z.skip += offset
return z.dpos + z.skip, nil
}

// Reset discards the Reader's state and makes it equivalent to the
// result of its original state from NewReader, but reading from r instead.
// This permits reusing a Reader rather than allocating a new one.
func (z *ReaderLegacy) Reset(r io.Reader) {
z.Header = Header{}
z.pos = 0
z.src = r
z.zdata = z.zdata[:0]
z.data = z.data[:0]
z.idx = 0
}

// readUint32 reads an uint32 into the supplied buffer.
// The idea is to make use of the already allocated buffers avoiding additional allocations.
func (z *ReaderLegacy) readUint32() (uint32, error) {
buf := z.buf[:4]
_, err := io.ReadFull(z.src, buf)
x := binary.LittleEndian.Uint32(buf)
return x, err
}
117 changes: 117 additions & 0 deletions reader_legacy_test.go
@@ -0,0 +1,117 @@
package lz4_test

import (
"bytes"
"io"
"io/ioutil"
"os"
"reflect"
"strings"
"testing"

"github.com/pierrec/lz4"
)

func TestReaderLegacy(t *testing.T) {
goldenFiles := []string{
"testdata/vmlinux_LZ4_19377.lz4",
"testdata/bzImage_lz4_isolated.lz4",
}

for _, fname := range goldenFiles {
t.Run(fname, func(t *testing.T) {
fname := fname
t.Parallel()

var out bytes.Buffer
rawfile := strings.TrimSuffix(fname, ".lz4")
raw, err := ioutil.ReadFile(rawfile)
if err != nil {
t.Fatal(err)
}

f, err := os.Open(fname)
if err != nil {
t.Fatal(err)
}
defer f.Close()

zr := lz4.NewReaderLegacy(f)
n, err := io.Copy(&out, zr)
if err != nil {
t.Fatal(err, n)
}

if got, want := int(n), len(raw); got != want {
t.Errorf("invalid sizes: got %d; want %d", got, want)
}

if got, want := out.Bytes(), raw; !reflect.DeepEqual(got, want) {
t.Fatal("uncompressed data does not match original")
}

if len(raw) < 20 {
return
}

f2, err := os.Open(fname)
if err != nil {
t.Fatal(err)
}
defer f2.Close()

out.Reset()
zr = lz4.NewReaderLegacy(f2)
_, err = io.CopyN(&out, zr, 10)
if err != nil {
t.Fatal(err)
}

if !reflect.DeepEqual(out.Bytes(), raw[:10]) {
t.Fatal("partial read does not match original")
} else {
t.Log("partial read is ok")
}

pos, err := zr.Seek(-1, io.SeekCurrent)
if err == nil {
t.Fatal("expected error from invalid seek")
}
if pos != 10 {
t.Fatalf("unexpected position %d", pos)
}
pos, err = zr.Seek(1, io.SeekStart)
if err == nil {
t.Fatal("expected error from invalid seek")
}
if pos != 10 {
t.Fatalf("unexpected position %d", pos)
}
pos, err = zr.Seek(-1, io.SeekEnd)
if err == nil {
t.Fatal("expected error from invalid seek")
}
if pos != 10 {
t.Fatalf("unexpected position %d", pos)
}

pos, err = zr.Seek(int64(len(raw)-20), io.SeekCurrent)
if err != nil {
t.Fatal(err)
}
if pos != int64(len(raw)-10) {
t.Fatalf("unexpected position %d", pos)
}

out.Reset()
_, err = io.CopyN(&out, zr, 10)
if err != nil {
t.Fatal(err)
}

if !reflect.DeepEqual(out.Bytes(), raw[len(raw)-10:]) {
t.Fatal("after seek, partial read does not match original")
}
})
}
}
Binary file added testdata/bzImage_lz4_isolated
Binary file not shown.
Binary file added testdata/bzImage_lz4_isolated.lz4
Binary file not shown.
Binary file added testdata/vmlinux_LZ4_19377
Binary file not shown.
Binary file added testdata/vmlinux_LZ4_19377.lz4
Binary file not shown.