Skip to content

Commit

Permalink
Merge pull request #100 from Caesurus/timdw/PR_to_pierrec
Browse files Browse the repository at this point in the history
Timdw/pr to pierrec
  • Loading branch information
pierrec committed Oct 31, 2020
2 parents e876bbd + 2751f8d commit 0e583d3
Show file tree
Hide file tree
Showing 10 changed files with 336 additions and 8 deletions.
2 changes: 1 addition & 1 deletion cmd/lz4c/compress.go
Expand Up @@ -10,8 +10,8 @@ import (
"code.cloudfoundry.org/bytefmt"
"github.com/schollz/progressbar"

"github.com/pierrec/cmdflag"
"github.com/pierrec/lz4"
"github.com/pierrec/cmdflag"
)

// Compress compresses a set of files or from stdin to stdout.
Expand Down
3 changes: 2 additions & 1 deletion fuzz/lz4.go
Expand Up @@ -2,8 +2,9 @@ package lz4

import (
"bytes"
"github.com/pierrec/lz4"
"io"

"github.com/pierrec/lz4"
)

// Fuzz function for the Reader and Writer.
Expand Down
2 changes: 1 addition & 1 deletion internal/xxh32/xxh32zero_test.go
Expand Up @@ -6,8 +6,8 @@ import (
"hash/fnv"
"testing"

qt "github.com/frankban/quicktest"
"github.com/pierrec/lz4/internal/xxh32"
qt "github.com/frankban/quicktest"
)

type test struct {
Expand Down
13 changes: 8 additions & 5 deletions lz4.go
Expand Up @@ -10,18 +10,20 @@
//
package lz4

import "math/bits"

import "sync"
import (
"math/bits"
"sync"
)

const (
// Extension is the LZ4 frame file name extension
Extension = ".lz4"
// Version is the LZ4 frame format version
Version = 1

frameMagic uint32 = 0x184D2204
frameSkipMagic uint32 = 0x184D2A50
frameMagic uint32 = 0x184D2204
frameSkipMagic uint32 = 0x184D2A50
frameMagicLegacy uint32 = 0x184C2102

// The following constants are used to setup the compression algorithm.
minMatch = 4 // the minimum size of the match sequence size (4 bytes)
Expand Down Expand Up @@ -108,6 +110,7 @@ type Header struct {
done bool // Header processed flag (Read or Write and checked).
}

// Reset reset internal status
func (h *Header) Reset() {
h.done = false
}
207 changes: 207 additions & 0 deletions reader_legacy.go
@@ -0,0 +1,207 @@
package lz4

import (
"encoding/binary"
"fmt"
"io"
)

// ReaderLegacy implements the LZ4Demo frame decoder.
// The Header is set after the first call to Read().
type ReaderLegacy struct {
Header
// Handler called when a block has been successfully read.
// It provides the number of bytes read.
OnBlockDone func(size int)

lastBlock bool
buf [8]byte // Scrap buffer.
pos int64 // Current position in src.
src io.Reader // Source.
zdata []byte // Compressed data.
data []byte // Uncompressed data.
idx int // Index of unread bytes into data.
skip int64 // Bytes to skip before next read.
dpos int64 // Position in dest
}

// NewReaderLegacy returns a new LZ4Demo frame decoder.
// No access to the underlying io.Reader is performed.
func NewReaderLegacy(src io.Reader) *ReaderLegacy {
r := &ReaderLegacy{src: src}
return r
}

// readHeader checks the frame magic number and parses the frame descriptoz.
// Skippable frames are supported even as a first frame although the LZ4
// specifications recommends skippable frames not to be used as first frames.
func (z *ReaderLegacy) readLegacyHeader() error {
z.lastBlock = false
magic, err := z.readUint32()
if err != nil {
z.pos += 4
if err == io.ErrUnexpectedEOF {
return io.EOF
}
return err
}
if magic != frameMagicLegacy {
return ErrInvalid
}
z.pos += 4

// Legacy has fixed 8MB blocksizes
// https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md#legacy-frame
bSize := blockSize4M * 2

// Allocate the compressed/uncompressed buffers.
// The compressed buffer cannot exceed the uncompressed one.
if n := 2 * bSize; cap(z.zdata) < n {
z.zdata = make([]byte, n, n)
}
if debugFlag {
debug("header block max size size=%d", bSize)
}
z.zdata = z.zdata[:bSize]
z.data = z.zdata[:cap(z.zdata)][bSize:]
z.idx = len(z.data)

z.Header.done = true
if debugFlag {
debug("header read: %v", z.Header)
}

return nil
}

// Read decompresses data from the underlying source into the supplied buffer.
//
// Since there can be multiple streams concatenated, Header values may
// change between calls to Read(). If that is the case, no data is actually read from
// the underlying io.Reader, to allow for potential input buffer resizing.
func (z *ReaderLegacy) Read(buf []byte) (int, error) {
if debugFlag {
debug("Read buf len=%d", len(buf))
}
if !z.Header.done {
if err := z.readLegacyHeader(); err != nil {
return 0, err
}
if debugFlag {
debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
}
}

if len(buf) == 0 {
return 0, nil
}

if z.idx == len(z.data) {
// No data ready for reading, process the next block.
if debugFlag {
debug(" reading block from writer %d %d", z.idx, blockSize4M*2)
}

// Reset uncompressed buffer
z.data = z.zdata[:cap(z.zdata)][len(z.zdata):]

bLen, err := z.readUint32()
if err != nil {
return 0, err
}
if debugFlag {
debug(" bLen %d (0x%x) offset = %d (0x%x)", bLen, bLen, z.pos, z.pos)
}
z.pos += 4

// Legacy blocks are always compressed, even when detrimental
if debugFlag {
debug(" compressed block size %d", bLen)
}

if int(bLen) > cap(z.data) {
return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
}
zdata := z.zdata[:bLen]
if _, err := io.ReadFull(z.src, zdata); err != nil {
return 0, err
}
z.pos += int64(bLen)

n, err := UncompressBlock(zdata, z.data)
if err != nil {
return 0, err
}

z.data = z.data[:n]
if z.OnBlockDone != nil {
z.OnBlockDone(n)
}

z.idx = 0

// Legacy blocks are fixed to 8MB, if we read a decompressed block smaller than this
// it means we've reached the end...
if n < blockSize4M*2 {
z.lastBlock = true
}
}

if z.skip > int64(len(z.data[z.idx:])) {
z.skip -= int64(len(z.data[z.idx:]))
z.dpos += int64(len(z.data[z.idx:]))
z.idx = len(z.data)
return 0, nil
}

z.idx += int(z.skip)
z.dpos += z.skip
z.skip = 0

n := copy(buf, z.data[z.idx:])
z.idx += n
z.dpos += int64(n)
if debugFlag {
debug("%v] copied %d bytes to input (%d:%d)", z.lastBlock, n, z.idx, len(z.data))
}
if z.lastBlock && len(z.data) == z.idx {
return n, io.EOF
}
return n, nil
}

// Seek implements io.Seeker, but supports seeking forward from the current
// position only. Any other seek will return an error. Allows skipping output
// bytes which aren't needed, which in some scenarios is faster than reading
// and discarding them.
// Note this may cause future calls to Read() to read 0 bytes if all of the
// data they would have returned is skipped.
func (z *ReaderLegacy) Seek(offset int64, whence int) (int64, error) {
if offset < 0 || whence != io.SeekCurrent {
return z.dpos + z.skip, ErrUnsupportedSeek
}
z.skip += offset
return z.dpos + z.skip, nil
}

// Reset discards the Reader's state and makes it equivalent to the
// result of its original state from NewReader, but reading from r instead.
// This permits reusing a Reader rather than allocating a new one.
func (z *ReaderLegacy) Reset(r io.Reader) {
z.Header = Header{}
z.pos = 0
z.src = r
z.zdata = z.zdata[:0]
z.data = z.data[:0]
z.idx = 0
}

// readUint32 reads an uint32 into the supplied buffer.
// The idea is to make use of the already allocated buffers avoiding additional allocations.
func (z *ReaderLegacy) readUint32() (uint32, error) {
buf := z.buf[:4]
_, err := io.ReadFull(z.src, buf)
x := binary.LittleEndian.Uint32(buf)
return x, err
}
117 changes: 117 additions & 0 deletions reader_legacy_test.go
@@ -0,0 +1,117 @@
package lz4_test

import (
"bytes"
"io"
"io/ioutil"
"os"
"reflect"
"strings"
"testing"

"github.com/pierrec/lz4"
)

func TestReaderLegacy(t *testing.T) {
goldenFiles := []string{
"testdata/vmlinux_LZ4_19377.lz4",
"testdata/bzImage_lz4_isolated.lz4",
}

for _, fname := range goldenFiles {
t.Run(fname, func(t *testing.T) {
fname := fname
t.Parallel()

var out bytes.Buffer
rawfile := strings.TrimSuffix(fname, ".lz4")
raw, err := ioutil.ReadFile(rawfile)
if err != nil {
t.Fatal(err)
}

f, err := os.Open(fname)
if err != nil {
t.Fatal(err)
}
defer f.Close()

zr := lz4.NewReaderLegacy(f)
n, err := io.Copy(&out, zr)
if err != nil {
t.Fatal(err, n)
}

if got, want := int(n), len(raw); got != want {
t.Errorf("invalid sizes: got %d; want %d", got, want)
}

if got, want := out.Bytes(), raw; !reflect.DeepEqual(got, want) {
t.Fatal("uncompressed data does not match original")
}

if len(raw) < 20 {
return
}

f2, err := os.Open(fname)
if err != nil {
t.Fatal(err)
}
defer f2.Close()

out.Reset()
zr = lz4.NewReaderLegacy(f2)
_, err = io.CopyN(&out, zr, 10)
if err != nil {
t.Fatal(err)
}

if !reflect.DeepEqual(out.Bytes(), raw[:10]) {
t.Fatal("partial read does not match original")
} else {
t.Log("partial read is ok")
}

pos, err := zr.Seek(-1, io.SeekCurrent)
if err == nil {
t.Fatal("expected error from invalid seek")
}
if pos != 10 {
t.Fatalf("unexpected position %d", pos)
}
pos, err = zr.Seek(1, io.SeekStart)
if err == nil {
t.Fatal("expected error from invalid seek")
}
if pos != 10 {
t.Fatalf("unexpected position %d", pos)
}
pos, err = zr.Seek(-1, io.SeekEnd)
if err == nil {
t.Fatal("expected error from invalid seek")
}
if pos != 10 {
t.Fatalf("unexpected position %d", pos)
}

pos, err = zr.Seek(int64(len(raw)-20), io.SeekCurrent)
if err != nil {
t.Fatal(err)
}
if pos != int64(len(raw)-10) {
t.Fatalf("unexpected position %d", pos)
}

out.Reset()
_, err = io.CopyN(&out, zr, 10)
if err != nil {
t.Fatal(err)
}

if !reflect.DeepEqual(out.Bytes(), raw[len(raw)-10:]) {
t.Fatal("after seek, partial read does not match original")
}
})
}
}
Binary file added testdata/bzImage_lz4_isolated
Binary file not shown.
Binary file added testdata/bzImage_lz4_isolated.lz4
Binary file not shown.
Binary file added testdata/vmlinux_LZ4_19377
Binary file not shown.
Binary file added testdata/vmlinux_LZ4_19377.lz4
Binary file not shown.

0 comments on commit 0e583d3

Please sign in to comment.