Merge pull request #4741 from thaJeztah/bump_compress

vendor: github.com/klauspost/compress v1.17.4
2023-12-27 17:05:06 +01:00 · 2023-12-27 17:05:06 +01:00 · 38d141b900
parent ae41fd913e 1e38fc3b9d
commit 38d141b900
11 changed files with 55 additions and 81 deletions
--- a/vendor.mod
+++ b/vendor.mod
@ -62,7 +62,7 @@ require (
 	github.com/golang/protobuf v1.5.3 // indirect
 	github.com/gorilla/mux v1.8.1 // indirect
 	github.com/inconshreveable/mousetrap v1.1.0 // indirect
-	github.com/klauspost/compress v1.17.2 // indirect
+	github.com/klauspost/compress v1.17.4 // indirect
 	github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
 	github.com/miekg/pkcs11 v1.1.1 // indirect
 	github.com/moby/sys/symlink v0.2.0 // indirect
--- a/vendor.sum
+++ b/vendor.sum
@ -139,8 +139,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
 github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
-github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
-github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
+github.com/klauspost/compress v1.17.4 h1:Ej5ixsIri7BrIjBkRZLTo6ghwrEtHFk7ijlczPW4fZ4=
+github.com/klauspost/compress v1.17.4/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
 github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@ -16,6 +16,14 @@ This package provides various compression algorithms.

 # changelog

+* Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2)
+	* zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876
+
+* Oct 14th, 2023 - [v1.17.1](https://github.com/klauspost/compress/releases/tag/v1.17.1)
+	* s2: Fix S2 "best" dictionary wrong encoding by @klauspost in https://github.com/klauspost/compress/pull/871
+	* flate: Reduce allocations in decompressor and minor code improvements by @fakefloordiv in https://github.com/klauspost/compress/pull/869
+	* s2: Fix EstimateBlockSize on 6&7 length input by @klauspost in https://github.com/klauspost/compress/pull/867
+
 * Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0)
 	* Add experimental dictionary builder  https://github.com/klauspost/compress/pull/853
 	* Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838
--- a/vendor/github.com/klauspost/compress/fse/compress.go
+++ b/vendor/github.com/klauspost/compress/fse/compress.go
@ -212,7 +212,7 @@ func (s *Scratch) writeCount() error {
 		previous0 bool
 		charnum   uint16

-		maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
+		maxHeaderSize = ((int(s.symbolLen)*int(tableLog) + 4 + 2) >> 3) + 3

 		// Write Table Size
 		bitStream = uint32(tableLog - minTablelog)
--- a/vendor/github.com/klauspost/compress/huff0/bytereader.go
+++ b/vendor/github.com/klauspost/compress/huff0/bytereader.go
@ -1,44 +0,0 @@
-// Copyright 2018 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
-
-package huff0
-
-// byteReader provides a byte reader that reads
-// little endian values from a byte stream.
-// The input stream is manually advanced.
-// The reader performs no bounds checks.
-type byteReader struct {
-	b   []byte
-	off int
-}
-
-// init will initialize the reader and set the input.
-func (b *byteReader) init(in []byte) {
-	b.b = in
-	b.off = 0
-}
-
-// Int32 returns a little endian int32 starting at current offset.
-func (b byteReader) Int32() int32 {
-	v3 := int32(b.b[b.off+3])
-	v2 := int32(b.b[b.off+2])
-	v1 := int32(b.b[b.off+1])
-	v0 := int32(b.b[b.off])
-	return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
-}
-
-// Uint32 returns a little endian uint32 starting at current offset.
-func (b byteReader) Uint32() uint32 {
-	v3 := uint32(b.b[b.off+3])
-	v2 := uint32(b.b[b.off+2])
-	v1 := uint32(b.b[b.off+1])
-	v0 := uint32(b.b[b.off])
-	return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
-}
-
-// remain will return the number of bytes remaining.
-func (b byteReader) remain() int {
-	return len(b.b) - b.off
-}
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@ -350,6 +350,7 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
 // Does not update s.clearCount.
 func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
 	reuse = true
+	_ = s.count // Assert that s != nil to speed up the following loop.
 	for _, v := range in {
 		s.count[v]++
 	}
@ -415,7 +416,7 @@ func (s *Scratch) validateTable(c cTable) bool {

 // minTableLog provides the minimum logSize to safely represent a distribution.
 func (s *Scratch) minTableLog() uint8 {
-	minBitsSrc := highBit32(uint32(s.br.remain())) + 1
+	minBitsSrc := highBit32(uint32(s.srcLen)) + 1
 	minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2
 	if minBitsSrc < minBitsSymbols {
 		return uint8(minBitsSrc)
@ -427,7 +428,7 @@ func (s *Scratch) minTableLog() uint8 {
 func (s *Scratch) optimalTableLog() {
 	tableLog := s.TableLog
 	minBits := s.minTableLog()
-	maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1
+	maxBitsSrc := uint8(highBit32(uint32(s.srcLen-1))) - 1
 	if maxBitsSrc < tableLog {
 		// Accuracy can be reduced
 		tableLog = maxBitsSrc
--- a/vendor/github.com/klauspost/compress/huff0/huff0.go
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@ -88,7 +88,7 @@ type Scratch struct {
 	// Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
 	MaxDecodedSize int

-	br byteReader
+	srcLen int

 	// MaxSymbolValue will override the maximum symbol value of the next block.
 	MaxSymbolValue uint8
@ -170,7 +170,7 @@ func (s *Scratch) prepare(in []byte) (*Scratch, error) {
 	if s.fse == nil {
 		s.fse = &fse.Scratch{}
 	}
-	s.br.init(in)
+	s.srcLen = len(in)

 	return s, nil
 }
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@ -259,7 +259,7 @@ nyc-taxi-data-10M.csv   gzkp    1   3325605752  922273214   13929   227.68

 ## Decompressor

-Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
+Status: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.

 This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz),
 kindly supplied by [fuzzit.dev](https://fuzzit.dev/). 
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) {
 	if m.rep < 0 {
 		ofc = ofCode(uint32(m.s-m.offset) + 3)
 	} else {
-		ofc = ofCode(uint32(m.rep))
+		ofc = ofCode(uint32(m.rep) & 3)
 	}
 	// Cost, excluding
 	ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
@ -227,7 +227,7 @@ encodeLoop:
 				}
 			}
 			l := 4 + e.matchlen(s+4, offset+4, src)
-			if rep < 0 {
+			if true {
 				// Extend candidate match backwards as far as possible.
 				tMin := s - e.maxMatchOff
 				if tMin < 0 {
@ -282,6 +282,7 @@ encodeLoop:
 		// Load next and check...
 		e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
 		e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
+		index0 := s + 1

 		// Look far ahead, unless we have a really long match already...
 		if best.length < goodEnough {
@ -357,19 +358,16 @@ encodeLoop:
 			blk.sequences = append(blk.sequences, seq)

 			// Index old s + 1 -> s - 1
-			index0 := s + 1
 			s = best.s + best.length
-
 			nextEmit = s
-			if s >= sLimit {
-				if debugEncoder {
-					println("repeat ended", s, best.length)
-				}
-				break encodeLoop
-			}
+
 			// Index skipped...
+			end := s
+			if s > sLimit+4 {
+				end = sLimit + 4
+			}
 			off := index0 + e.cur
-			for index0 < s {
+			for index0 < end {
 				cv0 := load6432(src, index0)
 				h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 				h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@ -378,6 +376,7 @@ encodeLoop:
 				off++
 				index0++
 			}
+
 			switch best.rep {
 			case 2, 4 | 1:
 				offset1, offset2 = offset2, offset1
@ -386,12 +385,17 @@ encodeLoop:
 			case 4 | 3:
 				offset1, offset2, offset3 = offset1-1, offset1, offset2
 			}
+			if s >= sLimit {
+				if debugEncoder {
+					println("repeat ended", s, best.length)
+				}
+				break encodeLoop
+			}
 			continue
 		}

 		// A 4-byte match has been found. Update recent offsets.
 		// We'll later see if more than 4 bytes.
-		index0 := s + 1
 		s = best.s
 		t := best.offset
 		offset1, offset2, offset3 = s-t, offset1, offset2
@ -419,19 +423,25 @@ encodeLoop:
 		}
 		blk.sequences = append(blk.sequences, seq)
 		nextEmit = s
-		if s >= sLimit {
-			break encodeLoop
+
+		// Index old s + 1 -> s - 1 or sLimit
+		end := s
+		if s > sLimit-4 {
+			end = sLimit - 4
 		}

-		// Index old s + 1 -> s - 1
-		for index0 < s {
+		off := index0 + e.cur
+		for index0 < end {
 			cv0 := load6432(src, index0)
 			h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 			h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
-			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
 			index0++
+			off++
+		}
+		if s >= sLimit {
+			break encodeLoop
 		}
 	}

--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@ -145,7 +145,7 @@ encodeLoop:
 		var t int32
 		// We allow the encoder to optionally turn off repeat offsets across blocks
 		canRepeat := len(blk.sequences) > 2
-		var matched int32
+		var matched, index0 int32

 		for {
 			if debugAsserts && canRepeat && offset1 == 0 {
@ -162,6 +162,7 @@ encodeLoop:
 			off := s + e.cur
 			e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
 			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
+			index0 = s + 1

 			if canRepeat {
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@ -258,7 +259,6 @@ encodeLoop:
 					}
 					blk.sequences = append(blk.sequences, seq)

-					index0 := s + repOff2
 					s += lenght + repOff2
 					nextEmit = s
 					if s >= sLimit {
@ -498,15 +498,15 @@ encodeLoop:
 		}

 		// Index match start+1 (long) -> s - 1
-		index0 := s - l + 1
+		off := index0 + e.cur
 		for index0 < s-1 {
 			cv0 := load6432(src, index0)
 			cv1 := cv0 >> 8
 			h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
-			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
 			index0 += 2
+			off += 2
 		}

 		cv = load6432(src, s)
@ -672,7 +672,7 @@ encodeLoop:
 		var t int32
 		// We allow the encoder to optionally turn off repeat offsets across blocks
 		canRepeat := len(blk.sequences) > 2
-		var matched int32
+		var matched, index0 int32

 		for {
 			if debugAsserts && canRepeat && offset1 == 0 {
@ -691,6 +691,7 @@ encodeLoop:
 			e.markLongShardDirty(nextHashL)
 			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
 			e.markShortShardDirty(nextHashS)
+			index0 = s + 1

 			if canRepeat {
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@ -726,7 +727,6 @@ encodeLoop:
 					blk.sequences = append(blk.sequences, seq)

 					// Index match start+1 (long) -> s - 1
-					index0 := s + repOff
 					s += lenght + repOff

 					nextEmit = s
@ -790,7 +790,6 @@ encodeLoop:
 					}
 					blk.sequences = append(blk.sequences, seq)

-					index0 := s + repOff2
 					s += lenght + repOff2
 					nextEmit = s
 					if s >= sLimit {
@ -1024,18 +1023,18 @@ encodeLoop:
 		}

 		// Index match start+1 (long) -> s - 1
-		index0 := s - l + 1
+		off := index0 + e.cur
 		for index0 < s-1 {
 			cv0 := load6432(src, index0)
 			cv1 := cv0 >> 8
 			h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
-			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.markLongShardDirty(h0)
 			h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
 			e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
 			e.markShortShardDirty(h1)
 			index0 += 2
+			off += 2
 		}

 		cv = load6432(src, s)
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -159,8 +159,8 @@ github.com/gorilla/mux
 # github.com/inconshreveable/mousetrap v1.1.0
 ## explicit; go 1.18
 github.com/inconshreveable/mousetrap
-# github.com/klauspost/compress v1.17.2
-## explicit; go 1.18
+# github.com/klauspost/compress v1.17.4
+## explicit; go 1.19
 github.com/klauspost/compress
 github.com/klauspost/compress/fse
 github.com/klauspost/compress/huff0