Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement legacy format writer #118

Merged
merged 1 commit into from
Mar 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
182 changes: 182 additions & 0 deletions writer_legacy.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
package lz4

import (
"encoding/binary"
"io"
)

// WriterLegacy implements the LZ4Demo frame decoder.
type WriterLegacy struct {
Header
// Handler called when a block has been successfully read.
// It provides the number of bytes read.
OnBlockDone func(size int)

dst io.Writer // Destination.
data []byte // Data to be compressed + buffer for compressed data.
idx int // Index into data.
hashtable [winSize]int // Hash table used in CompressBlock().
}

// NewWriterLegacy returns a new LZ4 encoder for the legacy frame format.
// No access to the underlying io.Writer is performed.
// The supplied Header is checked at the first Write.
// It is ok to change it before the first Write but then not until a Reset() is performed.
func NewWriterLegacy(dst io.Writer) *WriterLegacy {
z := new(WriterLegacy)
z.Reset(dst)
return z
}

// Write compresses data from the supplied buffer into the underlying io.Writer.
// Write does not return until the data has been written.
func (z *WriterLegacy) Write(buf []byte) (int, error) {
if !z.Header.done {
if err := z.writeHeader(); err != nil {
return 0, err
}
}
if debugFlag {
debug("input buffer len=%d index=%d", len(buf), z.idx)
}

zn := len(z.data)
var n int
for len(buf) > 0 {
if z.idx == 0 && len(buf) >= zn {
// Avoid a copy as there is enough data for a block.
if err := z.compressBlock(buf[:zn]); err != nil {
return n, err
}
n += zn
buf = buf[zn:]
continue
}
// Accumulate the data to be compressed.
m := copy(z.data[z.idx:], buf)
n += m
z.idx += m
buf = buf[m:]
if debugFlag {
debug("%d bytes copied to buf, current index %d", n, z.idx)
}

if z.idx < len(z.data) {
// Buffer not filled.
if debugFlag {
debug("need more data for compression")
}
return n, nil
}

// Buffer full.
if err := z.compressBlock(z.data); err != nil {
return n, err
}
z.idx = 0
}

return n, nil
}

// writeHeader builds and writes the header to the underlying io.Writer.
func (z *WriterLegacy) writeHeader() error {
// Legacy has fixed 8MB blocksizes
// https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md#legacy-frame
bSize := 2 * blockSize4M

buf := make([]byte, 2*bSize, 2*bSize)
z.data = buf[:bSize] // Uncompressed buffer is the first half.

z.idx = 0

// Header consists of one mageic number, write it out.
if err := binary.Write(z.dst, binary.LittleEndian, frameMagicLegacy); err != nil {
return err
}
z.Header.done = true
if debugFlag {
debug("wrote header %v", z.Header)
}

return nil
}

// compressBlock compresses a block.
func (z *WriterLegacy) compressBlock(data []byte) error {
bSize := 2 * blockSize4M
zdata := z.data[bSize:cap(z.data)]
// The compressed block size cannot exceed the input's.
var zn int

if level := z.Header.CompressionLevel; level != 0 {
zn, _ = CompressBlockHC(data, zdata, level)
} else {
zn, _ = CompressBlock(data, zdata, z.hashtable[:])
}

if debugFlag {
debug("block compression %d => %d", len(data), zn)
}
zdata = zdata[:zn]

// Write the block.
if err := binary.Write(z.dst, binary.LittleEndian, uint32(zn)); err != nil {
return err
}
written, err := z.dst.Write(zdata)
if err != nil {
return err
}
if h := z.OnBlockDone; h != nil {
h(written)
}
return nil
}

// Flush flushes any pending compressed data to the underlying writer.
// Flush does not return until the data has been written.
// If the underlying writer returns an error, Flush returns that error.
func (z *WriterLegacy) Flush() error {
if debugFlag {
debug("flush with index %d", z.idx)
}
if z.idx == 0 {
return nil
}

data := z.data[:z.idx]
z.idx = 0
return z.compressBlock(data)
}

// Close closes the WriterLegacy, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer.
func (z *WriterLegacy) Close() error {
if !z.Header.done {
if err := z.writeHeader(); err != nil {
return err
}
}
if err := z.Flush(); err != nil {
return err
}

if debugFlag {
debug("writing last empty block")
}

return nil
}

// Reset clears the state of the WriterLegacy z such that it is equivalent to its
// initial state from NewWriterLegacy, but instead writing to w.
// No access to the underlying io.Writer is performed.
func (z *WriterLegacy) Reset(w io.Writer) {
z.Header.Reset()
z.dst = w
z.idx = 0
// reset hashtable to ensure deterministic output.
for i := range z.hashtable {
z.hashtable[i] = 0
}
}
75 changes: 75 additions & 0 deletions writer_legacy_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package lz4_test

import (
"bytes"
"fmt"
"io"
"io/ioutil"
"reflect"
"testing"

"github.com/pierrec/lz4"
)

func TestWriterLegacy(t *testing.T) {
goldenFiles := []string{
"testdata/e.txt",
"testdata/gettysburg.txt",
"testdata/Mark.Twain-Tom.Sawyer.txt",
"testdata/Mark.Twain-Tom.Sawyer_long.txt",
"testdata/pg1661.txt",
"testdata/pi.txt",
"testdata/random.data",
"testdata/repeat.txt",
}

for _, fname := range goldenFiles {
for _, header := range []lz4.Header{
{}, // Default header.
{CompressionLevel: 10},
} {
label := fmt.Sprintf("%s/%s", fname, header)
t.Run(label, func(t *testing.T) {
fname := fname
header := header
t.Parallel()

raw, err := ioutil.ReadFile(fname)
if err != nil {
t.Fatal(err)
}
r := bytes.NewReader(raw)

// Compress.
var zout bytes.Buffer
zw := lz4.NewWriterLegacy(&zout)
zw.Header = header
_, err = io.Copy(zw, r)
if err != nil {
t.Fatal(err)
}
err = zw.Close()
if err != nil {
t.Fatal(err)
}

// Uncompress.
var out bytes.Buffer
zr := lz4.NewReaderLegacy(&zout)
n, err := io.Copy(&out, zr)
if err != nil {
t.Fatal(err)
}

// The uncompressed data must be the same as the initial input.
if got, want := int(n), len(raw); got != want {
t.Errorf("invalid sizes: got %d; want %d", got, want)
}

if got, want := out.Bytes(), raw; !reflect.DeepEqual(got, want) {
t.Fatal("uncompressed data does not match original")
}
})
}
}
}