Merge pull request #4741 from thaJeztah/bump_compress

vendor: github.com/klauspost/compress v1.17.4
This commit is contained in:
Sebastiaan van Stijn 2023-12-27 17:05:06 +01:00 committed by GitHub
commit 38d141b900
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 55 additions and 81 deletions

View File

@ -62,7 +62,7 @@ require (
github.com/golang/protobuf v1.5.3 // indirect github.com/golang/protobuf v1.5.3 // indirect
github.com/gorilla/mux v1.8.1 // indirect github.com/gorilla/mux v1.8.1 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/klauspost/compress v1.17.2 // indirect github.com/klauspost/compress v1.17.4 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/miekg/pkcs11 v1.1.1 // indirect github.com/miekg/pkcs11 v1.1.1 // indirect
github.com/moby/sys/symlink v0.2.0 // indirect github.com/moby/sys/symlink v0.2.0 // indirect

View File

@ -139,8 +139,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4= github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=

View File

@ -16,6 +16,14 @@ This package provides various compression algorithms.
# changelog # changelog
* Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2)
* zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876
* Oct 14th, 2023 - [v1.17.1](https://github.com/klauspost/compress/releases/tag/v1.17.1)
* s2: Fix S2 "best" dictionary wrong encoding by @klauspost in https://github.com/klauspost/compress/pull/871
* flate: Reduce allocations in decompressor and minor code improvements by @fakefloordiv in https://github.com/klauspost/compress/pull/869
* s2: Fix EstimateBlockSize on 6&7 length input by @klauspost in https://github.com/klauspost/compress/pull/867
* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0) * Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0)
* Add experimental dictionary builder https://github.com/klauspost/compress/pull/853 * Add experimental dictionary builder https://github.com/klauspost/compress/pull/853
* Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838 * Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838

View File

@ -212,7 +212,7 @@ func (s *Scratch) writeCount() error {
previous0 bool previous0 bool
charnum uint16 charnum uint16
maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 maxHeaderSize = ((int(s.symbolLen)*int(tableLog) + 4 + 2) >> 3) + 3
// Write Table Size // Write Table Size
bitStream = uint32(tableLog - minTablelog) bitStream = uint32(tableLog - minTablelog)

View File

@ -1,44 +0,0 @@
// Copyright 2018 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
package huff0
// byteReader provides a byte reader that reads
// little endian values from a byte stream.
// The input stream is manually advanced.
// The reader performs no bounds checks.
type byteReader struct {
b []byte
off int
}
// init will initialize the reader and set the input.
func (b *byteReader) init(in []byte) {
b.b = in
b.off = 0
}
// Int32 returns a little endian int32 starting at current offset.
func (b byteReader) Int32() int32 {
v3 := int32(b.b[b.off+3])
v2 := int32(b.b[b.off+2])
v1 := int32(b.b[b.off+1])
v0 := int32(b.b[b.off])
return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
}
// Uint32 returns a little endian uint32 starting at current offset.
func (b byteReader) Uint32() uint32 {
v3 := uint32(b.b[b.off+3])
v2 := uint32(b.b[b.off+2])
v1 := uint32(b.b[b.off+1])
v0 := uint32(b.b[b.off])
return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
}
// remain will return the number of bytes remaining.
func (b byteReader) remain() int {
return len(b.b) - b.off
}

View File

@ -350,6 +350,7 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
// Does not update s.clearCount. // Does not update s.clearCount.
func (s *Scratch) countSimple(in []byte) (max int, reuse bool) { func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
reuse = true reuse = true
_ = s.count // Assert that s != nil to speed up the following loop.
for _, v := range in { for _, v := range in {
s.count[v]++ s.count[v]++
} }
@ -415,7 +416,7 @@ func (s *Scratch) validateTable(c cTable) bool {
// minTableLog provides the minimum logSize to safely represent a distribution. // minTableLog provides the minimum logSize to safely represent a distribution.
func (s *Scratch) minTableLog() uint8 { func (s *Scratch) minTableLog() uint8 {
minBitsSrc := highBit32(uint32(s.br.remain())) + 1 minBitsSrc := highBit32(uint32(s.srcLen)) + 1
minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2 minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2
if minBitsSrc < minBitsSymbols { if minBitsSrc < minBitsSymbols {
return uint8(minBitsSrc) return uint8(minBitsSrc)
@ -427,7 +428,7 @@ func (s *Scratch) minTableLog() uint8 {
func (s *Scratch) optimalTableLog() { func (s *Scratch) optimalTableLog() {
tableLog := s.TableLog tableLog := s.TableLog
minBits := s.minTableLog() minBits := s.minTableLog()
maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1 maxBitsSrc := uint8(highBit32(uint32(s.srcLen-1))) - 1
if maxBitsSrc < tableLog { if maxBitsSrc < tableLog {
// Accuracy can be reduced // Accuracy can be reduced
tableLog = maxBitsSrc tableLog = maxBitsSrc

View File

@ -88,7 +88,7 @@ type Scratch struct {
// Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
MaxDecodedSize int MaxDecodedSize int
br byteReader srcLen int
// MaxSymbolValue will override the maximum symbol value of the next block. // MaxSymbolValue will override the maximum symbol value of the next block.
MaxSymbolValue uint8 MaxSymbolValue uint8
@ -170,7 +170,7 @@ func (s *Scratch) prepare(in []byte) (*Scratch, error) {
if s.fse == nil { if s.fse == nil {
s.fse = &fse.Scratch{} s.fse = &fse.Scratch{}
} }
s.br.init(in) s.srcLen = len(in)
return s, nil return s, nil
} }

View File

@ -259,7 +259,7 @@ nyc-taxi-data-10M.csv gzkp 1 3325605752 922273214 13929 227.68
## Decompressor ## Decompressor
Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested. Status: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz), This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz),
kindly supplied by [fuzzit.dev](https://fuzzit.dev/). kindly supplied by [fuzzit.dev](https://fuzzit.dev/).

View File

@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) {
if m.rep < 0 { if m.rep < 0 {
ofc = ofCode(uint32(m.s-m.offset) + 3) ofc = ofCode(uint32(m.s-m.offset) + 3)
} else { } else {
ofc = ofCode(uint32(m.rep)) ofc = ofCode(uint32(m.rep) & 3)
} }
// Cost, excluding // Cost, excluding
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc] ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
@ -227,7 +227,7 @@ encodeLoop:
} }
} }
l := 4 + e.matchlen(s+4, offset+4, src) l := 4 + e.matchlen(s+4, offset+4, src)
if rep < 0 { if true {
// Extend candidate match backwards as far as possible. // Extend candidate match backwards as far as possible.
tMin := s - e.maxMatchOff tMin := s - e.maxMatchOff
if tMin < 0 { if tMin < 0 {
@ -282,6 +282,7 @@ encodeLoop:
// Load next and check... // Load next and check...
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset} e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset} e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
index0 := s + 1
// Look far ahead, unless we have a really long match already... // Look far ahead, unless we have a really long match already...
if best.length < goodEnough { if best.length < goodEnough {
@ -357,19 +358,16 @@ encodeLoop:
blk.sequences = append(blk.sequences, seq) blk.sequences = append(blk.sequences, seq)
// Index old s + 1 -> s - 1 // Index old s + 1 -> s - 1
index0 := s + 1
s = best.s + best.length s = best.s + best.length
nextEmit = s nextEmit = s
if s >= sLimit {
if debugEncoder {
println("repeat ended", s, best.length)
}
break encodeLoop
}
// Index skipped... // Index skipped...
end := s
if s > sLimit+4 {
end = sLimit + 4
}
off := index0 + e.cur off := index0 + e.cur
for index0 < s { for index0 < end {
cv0 := load6432(src, index0) cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen) h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen) h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@ -378,6 +376,7 @@ encodeLoop:
off++ off++
index0++ index0++
} }
switch best.rep { switch best.rep {
case 2, 4 | 1: case 2, 4 | 1:
offset1, offset2 = offset2, offset1 offset1, offset2 = offset2, offset1
@ -386,12 +385,17 @@ encodeLoop:
case 4 | 3: case 4 | 3:
offset1, offset2, offset3 = offset1-1, offset1, offset2 offset1, offset2, offset3 = offset1-1, offset1, offset2
} }
if s >= sLimit {
if debugEncoder {
println("repeat ended", s, best.length)
}
break encodeLoop
}
continue continue
} }
// A 4-byte match has been found. Update recent offsets. // A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes. // We'll later see if more than 4 bytes.
index0 := s + 1
s = best.s s = best.s
t := best.offset t := best.offset
offset1, offset2, offset3 = s-t, offset1, offset2 offset1, offset2, offset3 = s-t, offset1, offset2
@ -419,19 +423,25 @@ encodeLoop:
} }
blk.sequences = append(blk.sequences, seq) blk.sequences = append(blk.sequences, seq)
nextEmit = s nextEmit = s
if s >= sLimit {
break encodeLoop // Index old s + 1 -> s - 1 or sLimit
end := s
if s > sLimit-4 {
end = sLimit - 4
} }
// Index old s + 1 -> s - 1 off := index0 + e.cur
for index0 < s { for index0 < end {
cv0 := load6432(src, index0) cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen) h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen) h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
index0++ index0++
off++
}
if s >= sLimit {
break encodeLoop
} }
} }

View File

@ -145,7 +145,7 @@ encodeLoop:
var t int32 var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks // We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2 canRepeat := len(blk.sequences) > 2
var matched int32 var matched, index0 int32
for { for {
if debugAsserts && canRepeat && offset1 == 0 { if debugAsserts && canRepeat && offset1 == 0 {
@ -162,6 +162,7 @@ encodeLoop:
off := s + e.cur off := s + e.cur
e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
index0 = s + 1
if canRepeat { if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@ -258,7 +259,6 @@ encodeLoop:
} }
blk.sequences = append(blk.sequences, seq) blk.sequences = append(blk.sequences, seq)
index0 := s + repOff2
s += lenght + repOff2 s += lenght + repOff2
nextEmit = s nextEmit = s
if s >= sLimit { if s >= sLimit {
@ -498,15 +498,15 @@ encodeLoop:
} }
// Index match start+1 (long) -> s - 1 // Index match start+1 (long) -> s - 1
index0 := s - l + 1 off := index0 + e.cur
for index0 < s-1 { for index0 < s-1 {
cv0 := load6432(src, index0) cv0 := load6432(src, index0)
cv1 := cv0 >> 8 cv1 := cv0 >> 8
h0 := hashLen(cv0, betterLongTableBits, betterLongLen) h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2 index0 += 2
off += 2
} }
cv = load6432(src, s) cv = load6432(src, s)
@ -672,7 +672,7 @@ encodeLoop:
var t int32 var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks // We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2 canRepeat := len(blk.sequences) > 2
var matched int32 var matched, index0 int32
for { for {
if debugAsserts && canRepeat && offset1 == 0 { if debugAsserts && canRepeat && offset1 == 0 {
@ -691,6 +691,7 @@ encodeLoop:
e.markLongShardDirty(nextHashL) e.markLongShardDirty(nextHashL)
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
e.markShortShardDirty(nextHashS) e.markShortShardDirty(nextHashS)
index0 = s + 1
if canRepeat { if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@ -726,7 +727,6 @@ encodeLoop:
blk.sequences = append(blk.sequences, seq) blk.sequences = append(blk.sequences, seq)
// Index match start+1 (long) -> s - 1 // Index match start+1 (long) -> s - 1
index0 := s + repOff
s += lenght + repOff s += lenght + repOff
nextEmit = s nextEmit = s
@ -790,7 +790,6 @@ encodeLoop:
} }
blk.sequences = append(blk.sequences, seq) blk.sequences = append(blk.sequences, seq)
index0 := s + repOff2
s += lenght + repOff2 s += lenght + repOff2
nextEmit = s nextEmit = s
if s >= sLimit { if s >= sLimit {
@ -1024,18 +1023,18 @@ encodeLoop:
} }
// Index match start+1 (long) -> s - 1 // Index match start+1 (long) -> s - 1
index0 := s - l + 1 off := index0 + e.cur
for index0 < s-1 { for index0 < s-1 {
cv0 := load6432(src, index0) cv0 := load6432(src, index0)
cv1 := cv0 >> 8 cv1 := cv0 >> 8
h0 := hashLen(cv0, betterLongTableBits, betterLongLen) h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0) e.markLongShardDirty(h0)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen) h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1) e.markShortShardDirty(h1)
index0 += 2 index0 += 2
off += 2
} }
cv = load6432(src, s) cv = load6432(src, s)

4
vendor/modules.txt vendored
View File

@ -159,8 +159,8 @@ github.com/gorilla/mux
# github.com/inconshreveable/mousetrap v1.1.0 # github.com/inconshreveable/mousetrap v1.1.0
## explicit; go 1.18 ## explicit; go 1.18
github.com/inconshreveable/mousetrap github.com/inconshreveable/mousetrap
# github.com/klauspost/compress v1.17.2 # github.com/klauspost/compress v1.17.4
## explicit; go 1.18 ## explicit; go 1.19
github.com/klauspost/compress github.com/klauspost/compress
github.com/klauspost/compress/fse github.com/klauspost/compress/fse
github.com/klauspost/compress/huff0 github.com/klauspost/compress/huff0