Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

zstd: Improve better/best compression #877

Merged
merged 2 commits into from Nov 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
44 changes: 27 additions & 17 deletions zstd/enc_best.go
Expand Up @@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) {
if m.rep < 0 {
ofc = ofCode(uint32(m.s-m.offset) + 3)
} else {
ofc = ofCode(uint32(m.rep))
ofc = ofCode(uint32(m.rep) & 3)
}
// Cost, excluding
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
Expand Down Expand Up @@ -227,7 +227,7 @@ encodeLoop:
}
}
l := 4 + e.matchlen(s+4, offset+4, src)
if rep < 0 {
if true {
// Extend candidate match backwards as far as possible.
tMin := s - e.maxMatchOff
if tMin < 0 {
Expand Down Expand Up @@ -282,6 +282,7 @@ encodeLoop:
// Load next and check...
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
index0 := s + 1

// Look far ahead, unless we have a really long match already...
if best.length < goodEnough {
Expand Down Expand Up @@ -357,19 +358,16 @@ encodeLoop:
blk.sequences = append(blk.sequences, seq)

// Index old s + 1 -> s - 1
index0 := s + 1
s = best.s + best.length

nextEmit = s
if s >= sLimit {
if debugEncoder {
println("repeat ended", s, best.length)
}
break encodeLoop
}

// Index skipped...
end := s
if s > sLimit+4 {
end = sLimit + 4
}
off := index0 + e.cur
for index0 < s {
for index0 < end {
cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
Expand All @@ -378,6 +376,7 @@ encodeLoop:
off++
index0++
}

switch best.rep {
case 2, 4 | 1:
offset1, offset2 = offset2, offset1
Expand All @@ -386,12 +385,17 @@ encodeLoop:
case 4 | 3:
offset1, offset2, offset3 = offset1-1, offset1, offset2
}
if s >= sLimit {
if debugEncoder {
println("repeat ended", s, best.length)
}
break encodeLoop
}
continue
}

// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
index0 := s + 1
s = best.s
t := best.offset
offset1, offset2, offset3 = s-t, offset1, offset2
Expand Down Expand Up @@ -419,19 +423,25 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
nextEmit = s
if s >= sLimit {
break encodeLoop

// Index old s + 1 -> s - 1 or sLimit
end := s
if s > sLimit-4 {
end = sLimit - 4
}

// Index old s + 1 -> s - 1
for index0 < s {
off := index0 + e.cur
for index0 < end {
cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
index0++
off++
}
if s >= sLimit {
break encodeLoop
}
}

Expand Down
17 changes: 8 additions & 9 deletions zstd/enc_better.go
Expand Up @@ -145,7 +145,7 @@ encodeLoop:
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
var matched int32
var matched, index0 int32

for {
if debugAsserts && canRepeat && offset1 == 0 {
Expand All @@ -162,6 +162,7 @@ encodeLoop:
off := s + e.cur
e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
index0 = s + 1

if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
Expand Down Expand Up @@ -258,7 +259,6 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)

index0 := s + repOff2
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
Expand Down Expand Up @@ -498,15 +498,15 @@ encodeLoop:
}

// Index match start+1 (long) -> s - 1
index0 := s - l + 1
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
off += 2
}

cv = load6432(src, s)
Expand Down Expand Up @@ -672,7 +672,7 @@ encodeLoop:
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
var matched int32
var matched, index0 int32

for {
if debugAsserts && canRepeat && offset1 == 0 {
Expand All @@ -691,6 +691,7 @@ encodeLoop:
e.markLongShardDirty(nextHashL)
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
e.markShortShardDirty(nextHashS)
index0 = s + 1

if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
Expand Down Expand Up @@ -726,7 +727,6 @@ encodeLoop:
blk.sequences = append(blk.sequences, seq)

// Index match start+1 (long) -> s - 1
index0 := s + repOff
s += lenght + repOff

nextEmit = s
Expand Down Expand Up @@ -790,7 +790,6 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)

index0 := s + repOff2
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
Expand Down Expand Up @@ -1024,18 +1023,18 @@ encodeLoop:
}

// Index match start+1 (long) -> s - 1
index0 := s - l + 1
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
off += 2
}

cv = load6432(src, s)
Expand Down