vendor: github.com/klauspost/compress v1.15.1

full diff: https://github.com/klauspost/compress/compare/v1.15.0...v1.15.1 Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2022-04-29 20:45:18 +02:00 · 2022-04-29 20:45:18 +02:00 · b3f3beb739
parent acf6aee911
commit b3f3beb739
22 changed files with 1889 additions and 258 deletions
--- a/vendor.mod
+++ b/vendor.mod
@ -53,7 +53,7 @@ require (
 	github.com/golang/protobuf v1.5.2 // indirect
 	github.com/gorilla/mux v1.8.0 // indirect; updated to v1.8.0 to get rid of old compatibility for "context"
 	github.com/inconshreveable/mousetrap v1.0.0 // indirect
-	github.com/klauspost/compress v1.15.0 // indirect
+	github.com/klauspost/compress v1.15.1 // indirect
 	github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
 	github.com/miekg/pkcs11 v1.1.1 // indirect
 	github.com/moby/sys/symlink v0.2.0 // indirect
--- a/vendor.sum
+++ b/vendor.sum
@ -249,8 +249,8 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8
 github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
-github.com/klauspost/compress v1.15.0 h1:xqfchp4whNFxn5A4XFyyYtitiWI8Hy5EW59jEwcyL6U=
+github.com/klauspost/compress v1.15.1 h1:y9FcTHGyrebwfP0ZZqFiaxTaiDnUrGkJkI+f583BL1A=
-github.com/klauspost/compress v1.15.0/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
+github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@ -17,6 +17,23 @@ This package provides various compression algorithms.
 # changelog
 * Mar 3, 2022 (v1.15.0)
 	* zstd: Refactor decoder by @klauspost in [#498](https://github.com/klauspost/compress/pull/498)
 	* zstd: Add stream encoding without goroutines by @klauspost in [#505](https://github.com/klauspost/compress/pull/505)
 	* huff0: Prevent single blocks exceeding 16 bits by @klauspost in[#507](https://github.com/klauspost/compress/pull/507)
 	* flate: Inline literal emission by @klauspost in [#509](https://github.com/klauspost/compress/pull/509)
 	* gzhttp: Add zstd to transport by @klauspost in [#400](https://github.com/klauspost/compress/pull/400)
 	* gzhttp: Make content-type optional by @klauspost in [#510](https://github.com/klauspost/compress/pull/510)
 <details>
 	<summary>See  Details</summary>
 Both compression and decompression now supports "synchronous" stream operations. This means that whenever "concurrency" is set to 1, they will operate without spawning goroutines.
 Stream decompression is now faster on asynchronous, since the goroutine allocation much more effectively splits the workload. On typical streams this will typically use 2 cores fully for decompression. When a stream has finished decoding no goroutines will be left over, so decoders can now safely be pooled and still be garbage collected.
 While the release has been extensively tested, it is recommended to testing when upgrading.
 </details>
 * Feb 22, 2022 (v1.14.4)
 	* flate: Fix rare huffman only (-2) corruption. [#503](https://github.com/klauspost/compress/pull/503)
 	* zip: Update deprecated CreateHeaderRaw to correctly call CreateRaw by @saracen in [#502](https://github.com/klauspost/compress/pull/502)
--- a/vendor/github.com/klauspost/compress/huff0/autogen.go
+++ b/vendor/github.com/klauspost/compress/huff0/autogen.go
@ -0,0 +1,5 @@
 package huff0
 //go:generate go run generate.go
 //go:generate asmfmt -w decompress_amd64.s
 //go:generate asmfmt -w decompress_8b_amd64.s
--- a/vendor/github.com/klauspost/compress/huff0/bitreader.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go
@ -165,6 +165,11 @@ func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 {
 	return uint16(b.value >> ((64 - n) & 63))
 }
 // peekTopBits(n) is equvialent to peekBitFast(64 - n)
 func (b *bitReaderShifted) peekTopBits(n uint8) uint16 {
 	return uint16(b.value >> n)
 }
 func (b *bitReaderShifted) advance(n uint8) {
 	b.bitsRead += n
 	b.value <<= n & 63
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@ -725,189 +725,6 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
 	return dst, br.close()
 }
 // Decompress4X will decompress a 4X encoded stream.
 // The length of the supplied input must match the end of a block exactly.
 // The *capacity* of the dst slice must match the destination size of
 // the uncompressed data exactly.
 func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
 	if len(d.dt.single) == 0 {
 		return nil, errors.New("no table loaded")
 	}
 	if len(src) < 6+(4*1) {
 		return nil, errors.New("input too small")
 	}
 	if use8BitTables && d.actualTableLog <= 8 {
 		return d.decompress4X8bit(dst, src)
 	}
 	var br [4]bitReaderShifted
 	// Decode "jump table"
 	start := 6
 	for i := 0; i < 3; i++ {
 		length := int(src[i*2]) | (int(src[i*2+1]) << 8)
 		if start+length >= len(src) {
 			return nil, errors.New("truncated input (or invalid offset)")
 		}
 		err := br[i].init(src[start : start+length])
 		if err != nil {
 			return nil, err
 		}
 		start += length
 	}
 	err := br[3].init(src[start:])
 	if err != nil {
 		return nil, err
 	}
 	// destination, offset to match first output
 	dstSize := cap(dst)
 	dst = dst[:dstSize]
 	out := dst
 	dstEvery := (dstSize + 3) / 4
 	const tlSize = 1 << tableLogMax
 	const tlMask = tlSize - 1
 	single := d.dt.single[:tlSize]
 	// Use temp table to avoid bound checks/append penalty.
 	buf := d.buffer()
 	var off uint8
 	var decoded int
 	// Decode 2 values from each decoder/loop.
 	const bufoff = 256
 	for {
 		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
 			break
 		}
 		{
 			const stream = 0
 			const stream2 = 1
 			br[stream].fillFast()
 			br[stream2].fillFast()
 			val := br[stream].peekBitsFast(d.actualTableLog)
 			val2 := br[stream2].peekBitsFast(d.actualTableLog)
 			v := single[val&tlMask]
 			v2 := single[val2&tlMask]
 			br[stream].advance(uint8(v.entry))
 			br[stream2].advance(uint8(v2.entry))
 			buf[stream][off] = uint8(v.entry >> 8)
 			buf[stream2][off] = uint8(v2.entry >> 8)
 			val = br[stream].peekBitsFast(d.actualTableLog)
 			val2 = br[stream2].peekBitsFast(d.actualTableLog)
 			v = single[val&tlMask]
 			v2 = single[val2&tlMask]
 			br[stream].advance(uint8(v.entry))
 			br[stream2].advance(uint8(v2.entry))
 			buf[stream][off+1] = uint8(v.entry >> 8)
 			buf[stream2][off+1] = uint8(v2.entry >> 8)
 		}
 		{
 			const stream = 2
 			const stream2 = 3
 			br[stream].fillFast()
 			br[stream2].fillFast()
 			val := br[stream].peekBitsFast(d.actualTableLog)
 			val2 := br[stream2].peekBitsFast(d.actualTableLog)
 			v := single[val&tlMask]
 			v2 := single[val2&tlMask]
 			br[stream].advance(uint8(v.entry))
 			br[stream2].advance(uint8(v2.entry))
 			buf[stream][off] = uint8(v.entry >> 8)
 			buf[stream2][off] = uint8(v2.entry >> 8)
 			val = br[stream].peekBitsFast(d.actualTableLog)
 			val2 = br[stream2].peekBitsFast(d.actualTableLog)
 			v = single[val&tlMask]
 			v2 = single[val2&tlMask]
 			br[stream].advance(uint8(v.entry))
 			br[stream2].advance(uint8(v2.entry))
 			buf[stream][off+1] = uint8(v.entry >> 8)
 			buf[stream2][off+1] = uint8(v2.entry >> 8)
 		}
 		off += 2
 		if off == 0 {
 			if bufoff > dstEvery {
 				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 1")
 			}
 			copy(out, buf[0][:])
 			copy(out[dstEvery:], buf[1][:])
 			copy(out[dstEvery*2:], buf[2][:])
 			copy(out[dstEvery*3:], buf[3][:])
 			out = out[bufoff:]
 			decoded += bufoff * 4
 			// There must at least be 3 buffers left.
 			if len(out) < dstEvery*3 {
 				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 2")
 			}
 		}
 	}
 	if off > 0 {
 		ioff := int(off)
 		if len(out) < dstEvery*3+ioff {
 			d.bufs.Put(buf)
 			return nil, errors.New("corruption detected: stream overrun 3")
 		}
 		copy(out, buf[0][:off])
 		copy(out[dstEvery:], buf[1][:off])
 		copy(out[dstEvery*2:], buf[2][:off])
 		copy(out[dstEvery*3:], buf[3][:off])
 		decoded += int(off) * 4
 		out = out[off:]
 	}
 	// Decode remaining.
 	remainBytes := dstEvery - (decoded / 4)
 	for i := range br {
 		offset := dstEvery * i
 		endsAt := offset + remainBytes
 		if endsAt > len(out) {
 			endsAt = len(out)
 		}
 		br := &br[i]
 		bitsLeft := br.remaining()
 		for bitsLeft > 0 {
 			br.fill()
 			if offset >= endsAt {
 				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 4")
 			}
 			// Read value and increment offset.
 			val := br.peekBitsFast(d.actualTableLog)
 			v := single[val&tlMask].entry
 			nBits := uint8(v)
 			br.advance(nBits)
 			bitsLeft -= uint(nBits)
 			out[offset] = uint8(v >> 8)
 			offset++
 		}
 		if offset != endsAt {
 			d.bufs.Put(buf)
 			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
 		}
 		decoded += offset - dstEvery*i
 		err = br.close()
 		if err != nil {
 			return nil, err
 		}
 	}
 	d.bufs.Put(buf)
 	if dstSize != decoded {
 		return nil, errors.New("corruption detected: short output block")
 	}
 	return dst, nil
 }
 // Decompress4X will decompress a 4X encoded stream.
 // The length of the supplied input must match the end of a block exactly.
 // The *capacity* of the dst slice must match the destination size of
--- a/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s
@ -0,0 +1,488 @@
 // +build !appengine
 // +build gc
 // +build !noasm
 #include "textflag.h"
 #include "funcdata.h"
 #include "go_asm.h"
 #define bufoff      256 // see decompress.go, we're using [4][256]byte table
 // func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
 //	peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
 TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8
 #define off             R8
 #define buffer          DI
 #define table           SI
 #define br_bits_read    R9
 #define br_value        R10
 #define br_offset       R11
 #define peek_bits       R12
 #define exhausted       DX
 #define br0             R13
 #define br1             R14
 #define br2             R15
 #define br3             BP
 	MOVQ BP, 0(SP)
 	XORQ exhausted, exhausted // exhausted = false
 	XORQ off, off             // off = 0
 	MOVBQZX peekBits+32(FP), peek_bits
 	MOVQ    buf+40(FP), buffer
 	MOVQ    tbl+48(FP), table
 	MOVQ pbr0+0(FP), br0
 	MOVQ pbr1+8(FP), br1
 	MOVQ pbr2+16(FP), br2
 	MOVQ pbr3+24(FP), br3
 main_loop:
 	// const stream = 0
 	// br0.fillFast()
 	MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read
 	MOVQ    bitReaderShifted_value(br0), br_value
 	MOVQ    bitReaderShifted_off(br0), br_offset
 	// if b.bitsRead >= 32 {
 	CMPQ br_bits_read, $32
 	JB   skip_fill0
 	SUBQ $32, br_bits_read // b.bitsRead -= 32
 	SUBQ $4, br_offset     // b.off -= 4
 	// v := b.in[b.off-4 : b.off]
 	// v = v[:4]
 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	MOVQ bitReaderShifted_in(br0), AX
 	MOVL 0(br_offset)(AX*1), AX       // AX = uint32(b.in[b.off:b.off+4])
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 	MOVQ br_bits_read, CX
 	SHLQ CL, AX
 	ORQ  AX, br_value
 	// exhausted = exhausted || (br0.off < 4)
 	CMPQ  br_offset, $4
 	SETLT DL
 	ORB   DL, DH
 	// }
 skip_fill0:
 	// val0 := br0.peekTopBits(peekBits)
 	MOVQ br_value, AX
 	MOVQ peek_bits, CX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v0 := table[val0&mask]
 	MOVW 0(table)(AX*2), AX // AX - v0
 	// br0.advance(uint8(v0.entry))
 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// val1 := br0.peekTopBits(peekBits)
 	MOVQ peek_bits, CX
 	MOVQ br_value, AX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v1 := table[val1&mask]
 	MOVW 0(table)(AX*2), AX // AX - v1
 	// br0.advance(uint8(v1.entry))
 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CX, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// these two writes get coalesced
 	// buf[stream][off] = uint8(v0.entry >> 8)
 	// buf[stream][off+1] = uint8(v1.entry >> 8)
 	MOVW BX, 0(buffer)(off*1)
 	// SECOND PART:
 	// val2 := br0.peekTopBits(peekBits)
 	MOVQ br_value, AX
 	MOVQ peek_bits, CX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v2 := table[val0&mask]
 	MOVW 0(table)(AX*2), AX // AX - v0
 	// br0.advance(uint8(v0.entry))
 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// val3 := br0.peekTopBits(peekBits)
 	MOVQ peek_bits, CX
 	MOVQ br_value, AX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v3 := table[val1&mask]
 	MOVW 0(table)(AX*2), AX // AX - v1
 	// br0.advance(uint8(v1.entry))
 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CX, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// these two writes get coalesced
 	// buf[stream][off+2] = uint8(v2.entry >> 8)
 	// buf[stream][off+3] = uint8(v3.entry >> 8)
 	MOVW BX, 0+2(buffer)(off*1)
 	// update the bitrader reader structure
 	MOVB br_bits_read, bitReaderShifted_bitsRead(br0)
 	MOVQ br_value, bitReaderShifted_value(br0)
 	MOVQ br_offset, bitReaderShifted_off(br0)
 	// const stream = 1
 	// br1.fillFast()
 	MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read
 	MOVQ    bitReaderShifted_value(br1), br_value
 	MOVQ    bitReaderShifted_off(br1), br_offset
 	// if b.bitsRead >= 32 {
 	CMPQ br_bits_read, $32
 	JB   skip_fill1
 	SUBQ $32, br_bits_read // b.bitsRead -= 32
 	SUBQ $4, br_offset     // b.off -= 4
 	// v := b.in[b.off-4 : b.off]
 	// v = v[:4]
 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	MOVQ bitReaderShifted_in(br1), AX
 	MOVL 0(br_offset)(AX*1), AX       // AX = uint32(b.in[b.off:b.off+4])
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 	MOVQ br_bits_read, CX
 	SHLQ CL, AX
 	ORQ  AX, br_value
 	// exhausted = exhausted || (br1.off < 4)
 	CMPQ  br_offset, $4
 	SETLT DL
 	ORB   DL, DH
 	// }
 skip_fill1:
 	// val0 := br1.peekTopBits(peekBits)
 	MOVQ br_value, AX
 	MOVQ peek_bits, CX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v0 := table[val0&mask]
 	MOVW 0(table)(AX*2), AX // AX - v0
 	// br1.advance(uint8(v0.entry))
 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// val1 := br1.peekTopBits(peekBits)
 	MOVQ peek_bits, CX
 	MOVQ br_value, AX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v1 := table[val1&mask]
 	MOVW 0(table)(AX*2), AX // AX - v1
 	// br1.advance(uint8(v1.entry))
 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CX, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// these two writes get coalesced
 	// buf[stream][off] = uint8(v0.entry >> 8)
 	// buf[stream][off+1] = uint8(v1.entry >> 8)
 	MOVW BX, 256(buffer)(off*1)
 	// SECOND PART:
 	// val2 := br1.peekTopBits(peekBits)
 	MOVQ br_value, AX
 	MOVQ peek_bits, CX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v2 := table[val0&mask]
 	MOVW 0(table)(AX*2), AX // AX - v0
 	// br1.advance(uint8(v0.entry))
 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// val3 := br1.peekTopBits(peekBits)
 	MOVQ peek_bits, CX
 	MOVQ br_value, AX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v3 := table[val1&mask]
 	MOVW 0(table)(AX*2), AX // AX - v1
 	// br1.advance(uint8(v1.entry))
 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CX, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// these two writes get coalesced
 	// buf[stream][off+2] = uint8(v2.entry >> 8)
 	// buf[stream][off+3] = uint8(v3.entry >> 8)
 	MOVW BX, 256+2(buffer)(off*1)
 	// update the bitrader reader structure
 	MOVB br_bits_read, bitReaderShifted_bitsRead(br1)
 	MOVQ br_value, bitReaderShifted_value(br1)
 	MOVQ br_offset, bitReaderShifted_off(br1)
 	// const stream = 2
 	// br2.fillFast()
 	MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read
 	MOVQ    bitReaderShifted_value(br2), br_value
 	MOVQ    bitReaderShifted_off(br2), br_offset
 	// if b.bitsRead >= 32 {
 	CMPQ br_bits_read, $32
 	JB   skip_fill2
 	SUBQ $32, br_bits_read // b.bitsRead -= 32
 	SUBQ $4, br_offset     // b.off -= 4
 	// v := b.in[b.off-4 : b.off]
 	// v = v[:4]
 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	MOVQ bitReaderShifted_in(br2), AX
 	MOVL 0(br_offset)(AX*1), AX       // AX = uint32(b.in[b.off:b.off+4])
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 	MOVQ br_bits_read, CX
 	SHLQ CL, AX
 	ORQ  AX, br_value
 	// exhausted = exhausted || (br2.off < 4)
 	CMPQ  br_offset, $4
 	SETLT DL
 	ORB   DL, DH
 	// }
 skip_fill2:
 	// val0 := br2.peekTopBits(peekBits)
 	MOVQ br_value, AX
 	MOVQ peek_bits, CX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v0 := table[val0&mask]
 	MOVW 0(table)(AX*2), AX // AX - v0
 	// br2.advance(uint8(v0.entry))
 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// val1 := br2.peekTopBits(peekBits)
 	MOVQ peek_bits, CX
 	MOVQ br_value, AX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v1 := table[val1&mask]
 	MOVW 0(table)(AX*2), AX // AX - v1
 	// br2.advance(uint8(v1.entry))
 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CX, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// these two writes get coalesced
 	// buf[stream][off] = uint8(v0.entry >> 8)
 	// buf[stream][off+1] = uint8(v1.entry >> 8)
 	MOVW BX, 512(buffer)(off*1)
 	// SECOND PART:
 	// val2 := br2.peekTopBits(peekBits)
 	MOVQ br_value, AX
 	MOVQ peek_bits, CX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v2 := table[val0&mask]
 	MOVW 0(table)(AX*2), AX // AX - v0
 	// br2.advance(uint8(v0.entry))
 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// val3 := br2.peekTopBits(peekBits)
 	MOVQ peek_bits, CX
 	MOVQ br_value, AX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v3 := table[val1&mask]
 	MOVW 0(table)(AX*2), AX // AX - v1
 	// br2.advance(uint8(v1.entry))
 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CX, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// these two writes get coalesced
 	// buf[stream][off+2] = uint8(v2.entry >> 8)
 	// buf[stream][off+3] = uint8(v3.entry >> 8)
 	MOVW BX, 512+2(buffer)(off*1)
 	// update the bitrader reader structure
 	MOVB br_bits_read, bitReaderShifted_bitsRead(br2)
 	MOVQ br_value, bitReaderShifted_value(br2)
 	MOVQ br_offset, bitReaderShifted_off(br2)
 	// const stream = 3
 	// br3.fillFast()
 	MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read
 	MOVQ    bitReaderShifted_value(br3), br_value
 	MOVQ    bitReaderShifted_off(br3), br_offset
 	// if b.bitsRead >= 32 {
 	CMPQ br_bits_read, $32
 	JB   skip_fill3
 	SUBQ $32, br_bits_read // b.bitsRead -= 32
 	SUBQ $4, br_offset     // b.off -= 4
 	// v := b.in[b.off-4 : b.off]
 	// v = v[:4]
 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	MOVQ bitReaderShifted_in(br3), AX
 	MOVL 0(br_offset)(AX*1), AX       // AX = uint32(b.in[b.off:b.off+4])
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 	MOVQ br_bits_read, CX
 	SHLQ CL, AX
 	ORQ  AX, br_value
 	// exhausted = exhausted || (br3.off < 4)
 	CMPQ  br_offset, $4
 	SETLT DL
 	ORB   DL, DH
 	// }
 skip_fill3:
 	// val0 := br3.peekTopBits(peekBits)
 	MOVQ br_value, AX
 	MOVQ peek_bits, CX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v0 := table[val0&mask]
 	MOVW 0(table)(AX*2), AX // AX - v0
 	// br3.advance(uint8(v0.entry))
 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// val1 := br3.peekTopBits(peekBits)
 	MOVQ peek_bits, CX
 	MOVQ br_value, AX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v1 := table[val1&mask]
 	MOVW 0(table)(AX*2), AX // AX - v1
 	// br3.advance(uint8(v1.entry))
 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CX, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// these two writes get coalesced
 	// buf[stream][off] = uint8(v0.entry >> 8)
 	// buf[stream][off+1] = uint8(v1.entry >> 8)
 	MOVW BX, 768(buffer)(off*1)
 	// SECOND PART:
 	// val2 := br3.peekTopBits(peekBits)
 	MOVQ br_value, AX
 	MOVQ peek_bits, CX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v2 := table[val0&mask]
 	MOVW 0(table)(AX*2), AX // AX - v0
 	// br3.advance(uint8(v0.entry))
 	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// val3 := br3.peekTopBits(peekBits)
 	MOVQ peek_bits, CX
 	MOVQ br_value, AX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 	// v3 := table[val1&mask]
 	MOVW 0(table)(AX*2), AX // AX - v1
 	// br3.advance(uint8(v1.entry))
 	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 	MOVBQZX AL, CX
 	SHLQ    CX, br_value     // value <<= n
 	ADDQ    CX, br_bits_read // bits_read += n
 	// these two writes get coalesced
 	// buf[stream][off+2] = uint8(v2.entry >> 8)
 	// buf[stream][off+3] = uint8(v3.entry >> 8)
 	MOVW BX, 768+2(buffer)(off*1)
 	// update the bitrader reader structure
 	MOVB br_bits_read, bitReaderShifted_bitsRead(br3)
 	MOVQ br_value, bitReaderShifted_value(br3)
 	MOVQ br_offset, bitReaderShifted_off(br3)
 	ADDQ $4, off // off += 2
 	TESTB DH, DH // any br[i].ofs < 4?
 	JNZ   end
 	CMPQ off, $bufoff
 	JL   main_loop
 end:
 	MOVQ 0(SP), BP
 	MOVB off, ret+56(FP)
 	RET
 #undef off
 #undef buffer
 #undef table
 #undef br_bits_read
 #undef br_value
 #undef br_offset
 #undef peek_bits
 #undef exhausted
 #undef br0
 #undef br1
 #undef br2
 #undef br3
--- a/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in
@ -0,0 +1,197 @@
 // +build !appengine
 // +build gc
 // +build !noasm
 #include "textflag.h"
 #include "funcdata.h"
 #include "go_asm.h"
 #define bufoff      256     // see decompress.go, we're using [4][256]byte table
 //func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
 //	peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
 TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8
 #define off             R8
 #define buffer          DI
 #define table           SI
 #define br_bits_read    R9
 #define br_value        R10
 #define br_offset       R11
 #define peek_bits       R12
 #define exhausted       DX
 #define br0             R13
 #define br1             R14
 #define br2             R15
 #define br3             BP
    MOVQ    BP, 0(SP)
    XORQ    exhausted, exhausted    // exhausted = false
    XORQ    off, off                // off = 0
    MOVBQZX peekBits+32(FP), peek_bits
    MOVQ    buf+40(FP), buffer
    MOVQ    tbl+48(FP), table
    MOVQ    pbr0+0(FP), br0
    MOVQ    pbr1+8(FP), br1
    MOVQ    pbr2+16(FP), br2
    MOVQ    pbr3+24(FP), br3
 main_loop:
 {{ define "decode_2_values_x86" }}
    // const stream = {{ var "id" }}
    // br{{ var "id"}}.fillFast()
    MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read
    MOVQ    bitReaderShifted_value(br{{ var "id" }}), br_value
    MOVQ    bitReaderShifted_off(br{{ var "id" }}), br_offset
 	// if b.bitsRead >= 32 {
    CMPQ    br_bits_read, $32
    JB      skip_fill{{ var "id" }}
    SUBQ    $32, br_bits_read       // b.bitsRead -= 32
    SUBQ    $4, br_offset           // b.off -= 4
 	// v := b.in[b.off-4 : b.off]
 	// v = v[:4]
 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
    MOVQ    bitReaderShifted_in(br{{ var "id" }}), AX
    MOVL    0(br_offset)(AX*1), AX  // AX = uint32(b.in[b.off:b.off+4])
 	// b.value |= uint64(low) << (b.bitsRead & 63)
    MOVQ    br_bits_read, CX
    SHLQ    CL, AX
    ORQ     AX, br_value
    // exhausted = exhausted || (br{{ var "id"}}.off < 4)
    CMPQ    br_offset, $4
    SETLT   DL
    ORB     DL, DH
    // }
 skip_fill{{ var "id" }}:
    // val0 := br{{ var "id"}}.peekTopBits(peekBits)
    MOVQ    br_value, AX
    MOVQ    peek_bits, CX
    SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask
    // v0 := table[val0&mask]
    MOVW    0(table)(AX*2), AX      // AX - v0
    // br{{ var "id"}}.advance(uint8(v0.entry))
    MOVB    AH, BL                  // BL = uint8(v0.entry >> 8)
    MOVBQZX AL, CX
    SHLQ    CL, br_value            // value <<= n
    ADDQ    CX, br_bits_read        // bits_read += n
    // val1 := br{{ var "id"}}.peekTopBits(peekBits)
    MOVQ    peek_bits, CX
    MOVQ    br_value, AX
    SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask
    // v1 := table[val1&mask]
    MOVW    0(table)(AX*2), AX      // AX - v1
    // br{{ var "id"}}.advance(uint8(v1.entry))
    MOVB    AH, BH                  // BH = uint8(v1.entry >> 8)
    MOVBQZX AL, CX
    SHLQ    CX, br_value            // value <<= n
    ADDQ    CX, br_bits_read        // bits_read += n
    // these two writes get coalesced
    // buf[stream][off] = uint8(v0.entry >> 8)
    // buf[stream][off+1] = uint8(v1.entry >> 8)
    MOVW    BX, {{ var "bufofs" }}(buffer)(off*1)
    // SECOND PART:
    // val2 := br{{ var "id"}}.peekTopBits(peekBits)
    MOVQ    br_value, AX
    MOVQ    peek_bits, CX
    SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask
    // v2 := table[val0&mask]
    MOVW    0(table)(AX*2), AX      // AX - v0
    // br{{ var "id"}}.advance(uint8(v0.entry))
    MOVB    AH, BL                  // BL = uint8(v0.entry >> 8)
    MOVBQZX AL, CX
    SHLQ    CL, br_value            // value <<= n
    ADDQ    CX, br_bits_read        // bits_read += n
    // val3 := br{{ var "id"}}.peekTopBits(peekBits)
    MOVQ    peek_bits, CX
    MOVQ    br_value, AX
    SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask
    // v3 := table[val1&mask]
    MOVW    0(table)(AX*2), AX      // AX - v1
    // br{{ var "id"}}.advance(uint8(v1.entry))
    MOVB    AH, BH                  // BH = uint8(v1.entry >> 8)
    MOVBQZX AL, CX
    SHLQ    CX, br_value            // value <<= n
    ADDQ    CX, br_bits_read        // bits_read += n
    // these two writes get coalesced
    // buf[stream][off+2] = uint8(v2.entry >> 8)
    // buf[stream][off+3] = uint8(v3.entry >> 8)
    MOVW    BX, {{ var "bufofs" }}+2(buffer)(off*1)
    // update the bitrader reader structure
    MOVB    br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }})
    MOVQ    br_value, bitReaderShifted_value(br{{ var "id" }})
    MOVQ    br_offset, bitReaderShifted_off(br{{ var "id" }})
 {{ end }}
    {{ set "id" "0" }}
    {{ set "ofs" "0" }}
    {{ set "bufofs" "0" }} {{/* id * bufoff */}}
    {{ template "decode_2_values_x86" . }}
    {{ set "id" "1" }}
    {{ set "ofs" "8" }}
    {{ set "bufofs" "256" }}
    {{ template "decode_2_values_x86" . }}
    {{ set "id" "2" }}
    {{ set "ofs" "16" }}
    {{ set "bufofs" "512" }}
    {{ template "decode_2_values_x86" . }}
    {{ set "id" "3" }}
    {{ set "ofs" "24" }}
    {{ set "bufofs" "768" }}
    {{ template "decode_2_values_x86" . }}
    ADDQ    $4, off     // off += 2
    TESTB   DH, DH      // any br[i].ofs < 4?
    JNZ     end
    CMPQ    off, $bufoff
    JL      main_loop
 end:
    MOVQ    0(SP), BP
    MOVB    off, ret+56(FP)
    RET
 #undef  off
 #undef  buffer
 #undef  table
 #undef  br_bits_read
 #undef  br_value
 #undef  br_offset
 #undef  peek_bits
 #undef  exhausted
 #undef  br0
 #undef  br1
 #undef  br2
 #undef  br3
--- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
@ -0,0 +1,181 @@
 //go:build amd64 && !appengine && !noasm && gc
 // +build amd64,!appengine,!noasm,gc
 // This file contains the specialisation of Decoder.Decompress4X
 // that uses an asm implementation of its main loop.
 package huff0
 import (
 	"errors"
 	"fmt"
 )
 // decompress4x_main_loop_x86 is an x86 assembler implementation
 // of Decompress4X when tablelog > 8.
 // go:noescape
 func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
 	peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
 // decompress4x_8b_loop_x86 is an x86 assembler implementation
 // of Decompress4X when tablelog <= 8 which decodes 4 entries
 // per loop.
 // go:noescape
 func decompress4x_8b_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
 	peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
 // fallback8BitSize is the size where using Go version is faster.
 const fallback8BitSize = 800
 // Decompress4X will decompress a 4X encoded stream.
 // The length of the supplied input must match the end of a block exactly.
 // The *capacity* of the dst slice must match the destination size of
 // the uncompressed data exactly.
 func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
 	if len(d.dt.single) == 0 {
 		return nil, errors.New("no table loaded")
 	}
 	if len(src) < 6+(4*1) {
 		return nil, errors.New("input too small")
 	}
 	use8BitTables := d.actualTableLog <= 8
 	if cap(dst) < fallback8BitSize && use8BitTables {
 		return d.decompress4X8bit(dst, src)
 	}
 	var br [4]bitReaderShifted
 	// Decode "jump table"
 	start := 6
 	for i := 0; i < 3; i++ {
 		length := int(src[i*2]) | (int(src[i*2+1]) << 8)
 		if start+length >= len(src) {
 			return nil, errors.New("truncated input (or invalid offset)")
 		}
 		err := br[i].init(src[start : start+length])
 		if err != nil {
 			return nil, err
 		}
 		start += length
 	}
 	err := br[3].init(src[start:])
 	if err != nil {
 		return nil, err
 	}
 	// destination, offset to match first output
 	dstSize := cap(dst)
 	dst = dst[:dstSize]
 	out := dst
 	dstEvery := (dstSize + 3) / 4
 	const tlSize = 1 << tableLogMax
 	const tlMask = tlSize - 1
 	single := d.dt.single[:tlSize]
 	// Use temp table to avoid bound checks/append penalty.
 	buf := d.buffer()
 	var off uint8
 	var decoded int
 	const debug = false
 	// see: bitReaderShifted.peekBitsFast()
 	peekBits := uint8((64 - d.actualTableLog) & 63)
 	// Decode 2 values from each decoder/loop.
 	const bufoff = 256
 	for {
 		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
 			break
 		}
 		if use8BitTables {
 			off = decompress4x_8b_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
 		} else {
 			off = decompress4x_main_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
 		}
 		if debug {
 			fmt.Print("DEBUG: ")
 			fmt.Printf("off=%d,", off)
 			for i := 0; i < 4; i++ {
 				fmt.Printf(" br[%d]={bitsRead=%d, value=%x, off=%d}",
 					i, br[i].bitsRead, br[i].value, br[i].off)
 			}
 			fmt.Println("")
 		}
 		if off != 0 {
 			break
 		}
 		if bufoff > dstEvery {
 			d.bufs.Put(buf)
 			return nil, errors.New("corruption detected: stream overrun 1")
 		}
 		copy(out, buf[0][:])
 		copy(out[dstEvery:], buf[1][:])
 		copy(out[dstEvery*2:], buf[2][:])
 		copy(out[dstEvery*3:], buf[3][:])
 		out = out[bufoff:]
 		decoded += bufoff * 4
 		// There must at least be 3 buffers left.
 		if len(out) < dstEvery*3 {
 			d.bufs.Put(buf)
 			return nil, errors.New("corruption detected: stream overrun 2")
 		}
 	}
 	if off > 0 {
 		ioff := int(off)
 		if len(out) < dstEvery*3+ioff {
 			d.bufs.Put(buf)
 			return nil, errors.New("corruption detected: stream overrun 3")
 		}
 		copy(out, buf[0][:off])
 		copy(out[dstEvery:], buf[1][:off])
 		copy(out[dstEvery*2:], buf[2][:off])
 		copy(out[dstEvery*3:], buf[3][:off])
 		decoded += int(off) * 4
 		out = out[off:]
 	}
 	// Decode remaining.
 	remainBytes := dstEvery - (decoded / 4)
 	for i := range br {
 		offset := dstEvery * i
 		endsAt := offset + remainBytes
 		if endsAt > len(out) {
 			endsAt = len(out)
 		}
 		br := &br[i]
 		bitsLeft := br.remaining()
 		for bitsLeft > 0 {
 			br.fill()
 			if offset >= endsAt {
 				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 4")
 			}
 			// Read value and increment offset.
 			val := br.peekBitsFast(d.actualTableLog)
 			v := single[val&tlMask].entry
 			nBits := uint8(v)
 			br.advance(nBits)
 			bitsLeft -= uint(nBits)
 			out[offset] = uint8(v >> 8)
 			offset++
 		}
 		if offset != endsAt {
 			d.bufs.Put(buf)
 			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
 		}
 		decoded += offset - dstEvery*i
 		err = br.close()
 		if err != nil {
 			return nil, err
 		}
 	}
 	d.bufs.Put(buf)
 	if dstSize != decoded {
 		return nil, errors.New("corruption detected: short output block")
 	}
 	return dst, nil
 }
--- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
@ -0,0 +1,506 @@
 // +build !appengine
 // +build gc
 // +build !noasm
 #include "textflag.h"
 #include "funcdata.h"
 #include "go_asm.h"
 #ifdef GOAMD64_v4
 #ifndef GOAMD64_v3
 #define GOAMD64_v3
 #endif
 #endif
 #define bufoff      256 // see decompress.go, we're using [4][256]byte table
 // func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
 //	peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
 TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8
 #define off             R8
 #define buffer          DI
 #define table           SI
 #define br_bits_read    R9
 #define br_value        R10
 #define br_offset       R11
 #define peek_bits       R12
 #define exhausted       DX
 #define br0             R13
 #define br1             R14
 #define br2             R15
 #define br3             BP
 	MOVQ BP, 0(SP)
 	XORQ exhausted, exhausted // exhausted = false
 	XORQ off, off             // off = 0
 	MOVBQZX peekBits+32(FP), peek_bits
 	MOVQ    buf+40(FP), buffer
 	MOVQ    tbl+48(FP), table
 	MOVQ pbr0+0(FP), br0
 	MOVQ pbr1+8(FP), br1
 	MOVQ pbr2+16(FP), br2
 	MOVQ pbr3+24(FP), br3
 main_loop:
 	// const stream = 0
 	// br0.fillFast()
 	MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read
 	MOVQ    bitReaderShifted_value(br0), br_value
 	MOVQ    bitReaderShifted_off(br0), br_offset
 	// We must have at least 2 * max tablelog left
 	CMPQ br_bits_read, $64-22
 	JBE  skip_fill0
 	SUBQ $32, br_bits_read // b.bitsRead -= 32
 	SUBQ $4, br_offset     // b.off -= 4
 	// v := b.in[b.off-4 : b.off]
 	// v = v[:4]
 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	MOVQ bitReaderShifted_in(br0), AX
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 #ifdef GOAMD64_v3
 	SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
 #else
 	MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
 	MOVQ br_bits_read, CX
 	SHLQ CL, AX
 #endif
 	ORQ AX, br_value
 	// exhausted = exhausted || (br0.off < 4)
 	CMPQ  br_offset, $4
 	SETLT DL
 	ORB   DL, DH
 	// }
 skip_fill0:
 	// val0 := br0.peekTopBits(peekBits)
 #ifdef GOAMD64_v3
 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
 #else
 	MOVQ br_value, AX
 	MOVQ peek_bits, CX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 #endif
 	// v0 := table[val0&mask]
 	MOVW 0(table)(AX*2), AX // AX - v0
 	// br0.advance(uint8(v0.entry))
 	MOVB AH, BL // BL = uint8(v0.entry >> 8)
 #ifdef GOAMD64_v3
 	MOVBQZX AL, CX
 	SHLXQ   AX, br_value, br_value // value <<= n
 #else
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value // value <<= n
 #endif
 	ADDQ CX, br_bits_read // bits_read += n
 #ifdef GOAMD64_v3
 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
 #else
 	// val1 := br0.peekTopBits(peekBits)
 	MOVQ peek_bits, CX
 	MOVQ br_value, AX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 #endif
 	// v1 := table[val1&mask]
 	MOVW 0(table)(AX*2), AX // AX - v1
 	// br0.advance(uint8(v1.entry))
 	MOVB AH, BH // BH = uint8(v1.entry >> 8)
 #ifdef GOAMD64_v3
 	MOVBQZX AL, CX
 	SHLXQ   AX, br_value, br_value // value <<= n
 #else
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value // value <<= n
 #endif
 	ADDQ CX, br_bits_read // bits_read += n
 	// these two writes get coalesced
 	// buf[stream][off] = uint8(v0.entry >> 8)
 	// buf[stream][off+1] = uint8(v1.entry >> 8)
 	MOVW BX, 0(buffer)(off*1)
 	// update the bitrader reader structure
 	MOVB br_bits_read, bitReaderShifted_bitsRead(br0)
 	MOVQ br_value, bitReaderShifted_value(br0)
 	MOVQ br_offset, bitReaderShifted_off(br0)
 	// const stream = 1
 	// br1.fillFast()
 	MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read
 	MOVQ    bitReaderShifted_value(br1), br_value
 	MOVQ    bitReaderShifted_off(br1), br_offset
 	// We must have at least 2 * max tablelog left
 	CMPQ br_bits_read, $64-22
 	JBE  skip_fill1
 	SUBQ $32, br_bits_read // b.bitsRead -= 32
 	SUBQ $4, br_offset     // b.off -= 4
 	// v := b.in[b.off-4 : b.off]
 	// v = v[:4]
 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	MOVQ bitReaderShifted_in(br1), AX
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 #ifdef GOAMD64_v3
 	SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
 #else
 	MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
 	MOVQ br_bits_read, CX
 	SHLQ CL, AX
 #endif
 	ORQ AX, br_value
 	// exhausted = exhausted || (br1.off < 4)
 	CMPQ  br_offset, $4
 	SETLT DL
 	ORB   DL, DH
 	// }
 skip_fill1:
 	// val0 := br1.peekTopBits(peekBits)
 #ifdef GOAMD64_v3
 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
 #else
 	MOVQ br_value, AX
 	MOVQ peek_bits, CX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 #endif
 	// v0 := table[val0&mask]
 	MOVW 0(table)(AX*2), AX // AX - v0
 	// br1.advance(uint8(v0.entry))
 	MOVB AH, BL // BL = uint8(v0.entry >> 8)
 #ifdef GOAMD64_v3
 	MOVBQZX AL, CX
 	SHLXQ   AX, br_value, br_value // value <<= n
 #else
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value // value <<= n
 #endif
 	ADDQ CX, br_bits_read // bits_read += n
 #ifdef GOAMD64_v3
 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
 #else
 	// val1 := br1.peekTopBits(peekBits)
 	MOVQ peek_bits, CX
 	MOVQ br_value, AX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 #endif
 	// v1 := table[val1&mask]
 	MOVW 0(table)(AX*2), AX // AX - v1
 	// br1.advance(uint8(v1.entry))
 	MOVB AH, BH // BH = uint8(v1.entry >> 8)
 #ifdef GOAMD64_v3
 	MOVBQZX AL, CX
 	SHLXQ   AX, br_value, br_value // value <<= n
 #else
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value // value <<= n
 #endif
 	ADDQ CX, br_bits_read // bits_read += n
 	// these two writes get coalesced
 	// buf[stream][off] = uint8(v0.entry >> 8)
 	// buf[stream][off+1] = uint8(v1.entry >> 8)
 	MOVW BX, 256(buffer)(off*1)
 	// update the bitrader reader structure
 	MOVB br_bits_read, bitReaderShifted_bitsRead(br1)
 	MOVQ br_value, bitReaderShifted_value(br1)
 	MOVQ br_offset, bitReaderShifted_off(br1)
 	// const stream = 2
 	// br2.fillFast()
 	MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read
 	MOVQ    bitReaderShifted_value(br2), br_value
 	MOVQ    bitReaderShifted_off(br2), br_offset
 	// We must have at least 2 * max tablelog left
 	CMPQ br_bits_read, $64-22
 	JBE  skip_fill2
 	SUBQ $32, br_bits_read // b.bitsRead -= 32
 	SUBQ $4, br_offset     // b.off -= 4
 	// v := b.in[b.off-4 : b.off]
 	// v = v[:4]
 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	MOVQ bitReaderShifted_in(br2), AX
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 #ifdef GOAMD64_v3
 	SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
 #else
 	MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
 	MOVQ br_bits_read, CX
 	SHLQ CL, AX
 #endif
 	ORQ AX, br_value
 	// exhausted = exhausted || (br2.off < 4)
 	CMPQ  br_offset, $4
 	SETLT DL
 	ORB   DL, DH
 	// }
 skip_fill2:
 	// val0 := br2.peekTopBits(peekBits)
 #ifdef GOAMD64_v3
 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
 #else
 	MOVQ br_value, AX
 	MOVQ peek_bits, CX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 #endif
 	// v0 := table[val0&mask]
 	MOVW 0(table)(AX*2), AX // AX - v0
 	// br2.advance(uint8(v0.entry))
 	MOVB AH, BL // BL = uint8(v0.entry >> 8)
 #ifdef GOAMD64_v3
 	MOVBQZX AL, CX
 	SHLXQ   AX, br_value, br_value // value <<= n
 #else
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value // value <<= n
 #endif
 	ADDQ CX, br_bits_read // bits_read += n
 #ifdef GOAMD64_v3
 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
 #else
 	// val1 := br2.peekTopBits(peekBits)
 	MOVQ peek_bits, CX
 	MOVQ br_value, AX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 #endif
 	// v1 := table[val1&mask]
 	MOVW 0(table)(AX*2), AX // AX - v1
 	// br2.advance(uint8(v1.entry))
 	MOVB AH, BH // BH = uint8(v1.entry >> 8)
 #ifdef GOAMD64_v3
 	MOVBQZX AL, CX
 	SHLXQ   AX, br_value, br_value // value <<= n
 #else
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value // value <<= n
 #endif
 	ADDQ CX, br_bits_read // bits_read += n
 	// these two writes get coalesced
 	// buf[stream][off] = uint8(v0.entry >> 8)
 	// buf[stream][off+1] = uint8(v1.entry >> 8)
 	MOVW BX, 512(buffer)(off*1)
 	// update the bitrader reader structure
 	MOVB br_bits_read, bitReaderShifted_bitsRead(br2)
 	MOVQ br_value, bitReaderShifted_value(br2)
 	MOVQ br_offset, bitReaderShifted_off(br2)
 	// const stream = 3
 	// br3.fillFast()
 	MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read
 	MOVQ    bitReaderShifted_value(br3), br_value
 	MOVQ    bitReaderShifted_off(br3), br_offset
 	// We must have at least 2 * max tablelog left
 	CMPQ br_bits_read, $64-22
 	JBE  skip_fill3
 	SUBQ $32, br_bits_read // b.bitsRead -= 32
 	SUBQ $4, br_offset     // b.off -= 4
 	// v := b.in[b.off-4 : b.off]
 	// v = v[:4]
 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 	MOVQ bitReaderShifted_in(br3), AX
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 #ifdef GOAMD64_v3
 	SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
 #else
 	MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
 	MOVQ br_bits_read, CX
 	SHLQ CL, AX
 #endif
 	ORQ AX, br_value
 	// exhausted = exhausted || (br3.off < 4)
 	CMPQ  br_offset, $4
 	SETLT DL
 	ORB   DL, DH
 	// }
 skip_fill3:
 	// val0 := br3.peekTopBits(peekBits)
 #ifdef GOAMD64_v3
 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
 #else
 	MOVQ br_value, AX
 	MOVQ peek_bits, CX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 #endif
 	// v0 := table[val0&mask]
 	MOVW 0(table)(AX*2), AX // AX - v0
 	// br3.advance(uint8(v0.entry))
 	MOVB AH, BL // BL = uint8(v0.entry >> 8)
 #ifdef GOAMD64_v3
 	MOVBQZX AL, CX
 	SHLXQ   AX, br_value, br_value // value <<= n
 #else
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value // value <<= n
 #endif
 	ADDQ CX, br_bits_read // bits_read += n
 #ifdef GOAMD64_v3
 	SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
 #else
 	// val1 := br3.peekTopBits(peekBits)
 	MOVQ peek_bits, CX
 	MOVQ br_value, AX
 	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 #endif
 	// v1 := table[val1&mask]
 	MOVW 0(table)(AX*2), AX // AX - v1
 	// br3.advance(uint8(v1.entry))
 	MOVB AH, BH // BH = uint8(v1.entry >> 8)
 #ifdef GOAMD64_v3
 	MOVBQZX AL, CX
 	SHLXQ   AX, br_value, br_value // value <<= n
 #else
 	MOVBQZX AL, CX
 	SHLQ    CL, br_value // value <<= n
 #endif
 	ADDQ CX, br_bits_read // bits_read += n
 	// these two writes get coalesced
 	// buf[stream][off] = uint8(v0.entry >> 8)
 	// buf[stream][off+1] = uint8(v1.entry >> 8)
 	MOVW BX, 768(buffer)(off*1)
 	// update the bitrader reader structure
 	MOVB br_bits_read, bitReaderShifted_bitsRead(br3)
 	MOVQ br_value, bitReaderShifted_value(br3)
 	MOVQ br_offset, bitReaderShifted_off(br3)
 	ADDQ $2, off // off += 2
 	TESTB DH, DH // any br[i].ofs < 4?
 	JNZ   end
 	CMPQ off, $bufoff
 	JL   main_loop
 end:
 	MOVQ 0(SP), BP
 	MOVB off, ret+56(FP)
 	RET
 #undef off
 #undef buffer
 #undef table
 #undef br_bits_read
 #undef br_value
 #undef br_offset
 #undef peek_bits
 #undef exhausted
 #undef br0
 #undef br1
 #undef br2
 #undef br3
--- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s.in
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s.in
@ -0,0 +1,195 @@
 // +build !appengine
 // +build gc
 // +build !noasm
 #include "textflag.h"
 #include "funcdata.h"
 #include "go_asm.h"
 #ifdef GOAMD64_v4
 #ifndef GOAMD64_v3
 #define GOAMD64_v3
 #endif
 #endif
 #define bufoff      256     // see decompress.go, we're using [4][256]byte table
 //func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
 //	peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
 TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8
 #define off             R8
 #define buffer          DI
 #define table           SI
 #define br_bits_read    R9
 #define br_value        R10
 #define br_offset       R11
 #define peek_bits       R12
 #define exhausted       DX
 #define br0             R13
 #define br1             R14
 #define br2             R15
 #define br3             BP
    MOVQ    BP, 0(SP)
    XORQ    exhausted, exhausted    // exhausted = false
    XORQ    off, off                // off = 0
    MOVBQZX peekBits+32(FP), peek_bits
    MOVQ    buf+40(FP), buffer
    MOVQ    tbl+48(FP), table
    MOVQ    pbr0+0(FP), br0
    MOVQ    pbr1+8(FP), br1
    MOVQ    pbr2+16(FP), br2
    MOVQ    pbr3+24(FP), br3
 main_loop:
 {{ define "decode_2_values_x86" }}
    // const stream = {{ var "id" }}
    // br{{ var "id"}}.fillFast()
    MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read
    MOVQ    bitReaderShifted_value(br{{ var "id" }}), br_value
    MOVQ    bitReaderShifted_off(br{{ var "id" }}), br_offset
    // We must have at least 2 * max tablelog left
    CMPQ    br_bits_read, $64-22
    JBE     skip_fill{{ var "id" }}
    SUBQ    $32, br_bits_read       // b.bitsRead -= 32
    SUBQ    $4, br_offset           // b.off -= 4
 	// v := b.in[b.off-4 : b.off]
 	// v = v[:4]
 	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
    MOVQ    bitReaderShifted_in(br{{ var "id" }}), AX
 	// b.value |= uint64(low) << (b.bitsRead & 63)
 #ifdef GOAMD64_v3
    SHLXQ   br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
 #else
    MOVL    0(br_offset)(AX*1), AX  // AX = uint32(b.in[b.off:b.off+4])
    MOVQ    br_bits_read, CX
    SHLQ    CL, AX
 #endif
    ORQ     AX, br_value
    // exhausted = exhausted || (br{{ var "id"}}.off < 4)
    CMPQ    br_offset, $4
    SETLT   DL
    ORB     DL, DH
    // }
 skip_fill{{ var "id" }}:
    // val0 := br{{ var "id"}}.peekTopBits(peekBits)
 #ifdef GOAMD64_v3
    SHRXQ   peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
 #else
    MOVQ    br_value, AX
    MOVQ    peek_bits, CX
    SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask
 #endif
    // v0 := table[val0&mask]
    MOVW    0(table)(AX*2), AX      // AX - v0
    // br{{ var "id"}}.advance(uint8(v0.entry))
    MOVB    AH, BL                  // BL = uint8(v0.entry >> 8)
 #ifdef GOAMD64_v3
    MOVBQZX AL, CX
    SHLXQ   AX, br_value, br_value // value <<= n
 #else
    MOVBQZX AL, CX
    SHLQ    CL, br_value            // value <<= n
 #endif
    ADDQ    CX, br_bits_read        // bits_read += n
 #ifdef GOAMD64_v3
    SHRXQ    peek_bits, br_value, AX  // AX = (value >> peek_bits) & mask
 #else
    // val1 := br{{ var "id"}}.peekTopBits(peekBits)
    MOVQ    peek_bits, CX
    MOVQ    br_value, AX
    SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask
 #endif
    // v1 := table[val1&mask]
    MOVW    0(table)(AX*2), AX      // AX - v1
    // br{{ var "id"}}.advance(uint8(v1.entry))
    MOVB    AH, BH                  // BH = uint8(v1.entry >> 8)
 #ifdef GOAMD64_v3
    MOVBQZX AL, CX
    SHLXQ   AX, br_value, br_value // value <<= n
 #else
    MOVBQZX AL, CX
    SHLQ    CL, br_value            // value <<= n
 #endif
    ADDQ    CX, br_bits_read        // bits_read += n
    // these two writes get coalesced
    // buf[stream][off] = uint8(v0.entry >> 8)
    // buf[stream][off+1] = uint8(v1.entry >> 8)
    MOVW    BX, {{ var "bufofs" }}(buffer)(off*1)
    // update the bitrader reader structure
    MOVB    br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }})
    MOVQ    br_value, bitReaderShifted_value(br{{ var "id" }})
    MOVQ    br_offset, bitReaderShifted_off(br{{ var "id" }})
 {{ end }}
    {{ set "id" "0" }}
    {{ set "ofs" "0" }}
    {{ set "bufofs" "0" }} {{/* id * bufoff */}}
    {{ template "decode_2_values_x86" . }}
    {{ set "id" "1" }}
    {{ set "ofs" "8" }}
    {{ set "bufofs" "256" }}
    {{ template "decode_2_values_x86" . }}
    {{ set "id" "2" }}
    {{ set "ofs" "16" }}
    {{ set "bufofs" "512" }}
    {{ template "decode_2_values_x86" . }}
    {{ set "id" "3" }}
    {{ set "ofs" "24" }}
    {{ set "bufofs" "768" }}
    {{ template "decode_2_values_x86" . }}
    ADDQ    $2, off     // off += 2
    TESTB   DH, DH      // any br[i].ofs < 4?
    JNZ     end
    CMPQ    off, $bufoff
    JL      main_loop
 end:
    MOVQ    0(SP), BP
    MOVB    off, ret+56(FP)
    RET
 #undef  off
 #undef  buffer
 #undef  table
 #undef  br_bits_read
 #undef  br_value
 #undef  br_offset
 #undef  peek_bits
 #undef  exhausted
 #undef  br0
 #undef  br1
 #undef  br2
 #undef  br3
--- a/vendor/github.com/klauspost/compress/huff0/decompress_generic.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress_generic.go
@ -0,0 +1,193 @@
 //go:build !amd64 || appengine || !gc || noasm
 // +build !amd64 appengine !gc noasm
 // This file contains a generic implementation of Decoder.Decompress4X.
 package huff0
 import (
 	"errors"
 	"fmt"
 )
 // Decompress4X will decompress a 4X encoded stream.
 // The length of the supplied input must match the end of a block exactly.
 // The *capacity* of the dst slice must match the destination size of
 // the uncompressed data exactly.
 func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
 	if len(d.dt.single) == 0 {
 		return nil, errors.New("no table loaded")
 	}
 	if len(src) < 6+(4*1) {
 		return nil, errors.New("input too small")
 	}
 	if use8BitTables && d.actualTableLog <= 8 {
 		return d.decompress4X8bit(dst, src)
 	}
 	var br [4]bitReaderShifted
 	// Decode "jump table"
 	start := 6
 	for i := 0; i < 3; i++ {
 		length := int(src[i*2]) | (int(src[i*2+1]) << 8)
 		if start+length >= len(src) {
 			return nil, errors.New("truncated input (or invalid offset)")
 		}
 		err := br[i].init(src[start : start+length])
 		if err != nil {
 			return nil, err
 		}
 		start += length
 	}
 	err := br[3].init(src[start:])
 	if err != nil {
 		return nil, err
 	}
 	// destination, offset to match first output
 	dstSize := cap(dst)
 	dst = dst[:dstSize]
 	out := dst
 	dstEvery := (dstSize + 3) / 4
 	const tlSize = 1 << tableLogMax
 	const tlMask = tlSize - 1
 	single := d.dt.single[:tlSize]
 	// Use temp table to avoid bound checks/append penalty.
 	buf := d.buffer()
 	var off uint8
 	var decoded int
 	// Decode 2 values from each decoder/loop.
 	const bufoff = 256
 	for {
 		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
 			break
 		}
 		{
 			const stream = 0
 			const stream2 = 1
 			br[stream].fillFast()
 			br[stream2].fillFast()
 			val := br[stream].peekBitsFast(d.actualTableLog)
 			val2 := br[stream2].peekBitsFast(d.actualTableLog)
 			v := single[val&tlMask]
 			v2 := single[val2&tlMask]
 			br[stream].advance(uint8(v.entry))
 			br[stream2].advance(uint8(v2.entry))
 			buf[stream][off] = uint8(v.entry >> 8)
 			buf[stream2][off] = uint8(v2.entry >> 8)
 			val = br[stream].peekBitsFast(d.actualTableLog)
 			val2 = br[stream2].peekBitsFast(d.actualTableLog)
 			v = single[val&tlMask]
 			v2 = single[val2&tlMask]
 			br[stream].advance(uint8(v.entry))
 			br[stream2].advance(uint8(v2.entry))
 			buf[stream][off+1] = uint8(v.entry >> 8)
 			buf[stream2][off+1] = uint8(v2.entry >> 8)
 		}
 		{
 			const stream = 2
 			const stream2 = 3
 			br[stream].fillFast()
 			br[stream2].fillFast()
 			val := br[stream].peekBitsFast(d.actualTableLog)
 			val2 := br[stream2].peekBitsFast(d.actualTableLog)
 			v := single[val&tlMask]
 			v2 := single[val2&tlMask]
 			br[stream].advance(uint8(v.entry))
 			br[stream2].advance(uint8(v2.entry))
 			buf[stream][off] = uint8(v.entry >> 8)
 			buf[stream2][off] = uint8(v2.entry >> 8)
 			val = br[stream].peekBitsFast(d.actualTableLog)
 			val2 = br[stream2].peekBitsFast(d.actualTableLog)
 			v = single[val&tlMask]
 			v2 = single[val2&tlMask]
 			br[stream].advance(uint8(v.entry))
 			br[stream2].advance(uint8(v2.entry))
 			buf[stream][off+1] = uint8(v.entry >> 8)
 			buf[stream2][off+1] = uint8(v2.entry >> 8)
 		}
 		off += 2
 		if off == 0 {
 			if bufoff > dstEvery {
 				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 1")
 			}
 			copy(out, buf[0][:])
 			copy(out[dstEvery:], buf[1][:])
 			copy(out[dstEvery*2:], buf[2][:])
 			copy(out[dstEvery*3:], buf[3][:])
 			out = out[bufoff:]
 			decoded += bufoff * 4
 			// There must at least be 3 buffers left.
 			if len(out) < dstEvery*3 {
 				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 2")
 			}
 		}
 	}
 	if off > 0 {
 		ioff := int(off)
 		if len(out) < dstEvery*3+ioff {
 			d.bufs.Put(buf)
 			return nil, errors.New("corruption detected: stream overrun 3")
 		}
 		copy(out, buf[0][:off])
 		copy(out[dstEvery:], buf[1][:off])
 		copy(out[dstEvery*2:], buf[2][:off])
 		copy(out[dstEvery*3:], buf[3][:off])
 		decoded += int(off) * 4
 		out = out[off:]
 	}
 	// Decode remaining.
 	remainBytes := dstEvery - (decoded / 4)
 	for i := range br {
 		offset := dstEvery * i
 		endsAt := offset + remainBytes
 		if endsAt > len(out) {
 			endsAt = len(out)
 		}
 		br := &br[i]
 		bitsLeft := br.remaining()
 		for bitsLeft > 0 {
 			br.fill()
 			if offset >= endsAt {
 				d.bufs.Put(buf)
 				return nil, errors.New("corruption detected: stream overrun 4")
 			}
 			// Read value and increment offset.
 			val := br.peekBitsFast(d.actualTableLog)
 			v := single[val&tlMask].entry
 			nBits := uint8(v)
 			br.advance(nBits)
 			bitsLeft -= uint(nBits)
 			out[offset] = uint8(v >> 8)
 			offset++
 		}
 		if offset != endsAt {
 			d.bufs.Put(buf)
 			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
 		}
 		decoded += offset - dstEvery*i
 		err = br.close()
 		if err != nil {
 			return nil, err
 		}
 	}
 	d.bufs.Put(buf)
 	if dstSize != decoded {
 		return nil, errors.New("corruption detected: short output block")
 	}
 	return dst, nil
 }
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@ -153,10 +153,10 @@ http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip
 This package:
 file    out     level   insize      outsize     millis  mb/s
-silesia.tar zskp    1   211947520   73101992    643     313.87
+silesia.tar zskp    1   211947520   73821326    634     318.47
-silesia.tar zskp    2   211947520   67504318    969     208.38
+silesia.tar zskp    2   211947520   67655404    1508    133.96
-silesia.tar zskp    3   211947520   64595893    2007    100.68
+silesia.tar zskp    3   211947520   64746933    3000    67.37
-silesia.tar zskp    4   211947520   60995370    8825    22.90
+silesia.tar zskp    4   211947520   60073508    16926   11.94
 cgo zstd:
 silesia.tar zstd    1   211947520   73605392    543     371.56
@ -165,94 +165,94 @@ silesia.tar zstd    6   211947520   62916450    1913    105.66
 silesia.tar zstd    9   211947520   60212393    5063    39.92
 gzip, stdlib/this package:
-silesia.tar gzstd   1   211947520   80007735    1654    122.21
+silesia.tar gzstd   1   211947520   80007735    1498    134.87
-silesia.tar gzkp    1   211947520   80136201    1152    175.45
+silesia.tar gzkp    1   211947520   80088272    1009    200.31
 GOB stream of binary data. Highly compressible.
 https://files.klauspost.com/compress/gob-stream.7z
 file        out     level   insize  outsize     millis  mb/s
-gob-stream  zskp    1   1911399616  235022249   3088    590.30
+gob-stream  zskp    1   1911399616  233948096   3230    564.34
-gob-stream  zskp    2   1911399616  205669791   3786    481.34
+gob-stream  zskp    2   1911399616  203997694   4997    364.73
-gob-stream  zskp    3   1911399616  175034659   9636    189.17
+gob-stream  zskp    3   1911399616  173526523   13435   135.68
-gob-stream  zskp    4   1911399616  165609838   50369   36.19
+gob-stream  zskp    4   1911399616  162195235   47559   38.33
 gob-stream  zstd    1   1911399616  249810424   2637    691.26
 gob-stream  zstd    3   1911399616  208192146   3490    522.31
 gob-stream  zstd    6   1911399616  193632038   6687    272.56
 gob-stream  zstd    9   1911399616  177620386   16175   112.70
-gob-stream  gzstd   1   1911399616  357382641   10251   177.82
+gob-stream  gzstd   1   1911399616  357382013   9046    201.49
-gob-stream  gzkp    1   1911399616  359753026   5438    335.20
+gob-stream  gzkp    1   1911399616  359136669   4885    373.08
 The test data for the Large Text Compression Benchmark is the first
 10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
 http://mattmahoney.net/dc/textdata.html
 file    out level   insize      outsize     millis  mb/s
-enwik9  zskp    1   1000000000  343848582   3609    264.18
+enwik9  zskp    1   1000000000  343833605   3687    258.64
-enwik9  zskp    2   1000000000  317276632   5746    165.97
+enwik9  zskp    2   1000000000  317001237   7672    124.29
-enwik9  zskp    3   1000000000  292243069   12162   78.41
+enwik9  zskp    3   1000000000  291915823   15923   59.89
-enwik9  zskp    4   1000000000  262183768   82837   11.51
+enwik9  zskp    4   1000000000  261710291   77697   12.27
 enwik9  zstd    1   1000000000  358072021   3110    306.65
 enwik9  zstd    3   1000000000  313734672   4784    199.35
 enwik9  zstd    6   1000000000  295138875   10290   92.68
 enwik9  zstd    9   1000000000  278348700   28549   33.40
-enwik9  gzstd   1   1000000000  382578136   9604    99.30
+enwik9  gzstd   1   1000000000  382578136   8608    110.78
-enwik9  gzkp    1   1000000000  383825945   6544    145.73
+enwik9  gzkp    1   1000000000  382781160   5628    169.45
 Highly compressible JSON file.
 https://files.klauspost.com/compress/github-june-2days-2019.json.zst
 file                        out level   insize      outsize     millis  mb/s
-github-june-2days-2019.json zskp    1   6273951764  699045015   10620   563.40
+github-june-2days-2019.json zskp    1   6273951764  697439532   9789    611.17
-github-june-2days-2019.json zskp    2   6273951764  617881763   11687   511.96
+github-june-2days-2019.json zskp    2   6273951764  610876538   18553   322.49
-github-june-2days-2019.json zskp    3   6273951764  524340691   34043   175.75
+github-june-2days-2019.json zskp    3   6273951764  517662858   44186   135.41
-github-june-2days-2019.json zskp    4   6273951764  470320075   170190  35.16
+github-june-2days-2019.json zskp    4   6273951764  464617114   165373  36.18
 github-june-2days-2019.json zstd    1   6273951764  766284037   8450    708.00
 github-june-2days-2019.json zstd    3   6273951764  661889476   10927   547.57
 github-june-2days-2019.json zstd    6   6273951764  642756859   22996   260.18
 github-june-2days-2019.json zstd    9   6273951764  601974523   52413   114.16
-github-june-2days-2019.json gzstd   1   6273951764  1164400847  29948   199.79
+github-june-2days-2019.json gzstd   1   6273951764  1164397768  26793   223.32
-github-june-2days-2019.json gzkp    1   6273951764  1125417694  21788   274.61
+github-june-2days-2019.json gzkp    1   6273951764  1120631856  17693   338.16
 VM Image, Linux mint with a few installed applications:
 https://files.klauspost.com/compress/rawstudio-mint14.7z
 file                    out level   insize      outsize     millis  mb/s
-rawstudio-mint14.tar    zskp    1   8558382592  3667489370  20210   403.84
+rawstudio-mint14.tar    zskp    1   8558382592  3718400221  18206   448.29
-rawstudio-mint14.tar    zskp    2   8558382592  3364592300  31873   256.07
+rawstudio-mint14.tar    zskp    2   8558382592  3326118337  37074   220.15
-rawstudio-mint14.tar    zskp    3   8558382592  3158085214  77675   105.08
+rawstudio-mint14.tar    zskp    3   8558382592  3163842361  87306   93.49
-rawstudio-mint14.tar    zskp    4   8558382592  2965110639  857750  9.52
+rawstudio-mint14.tar    zskp    4   8558382592  2970480650  783862  10.41
 rawstudio-mint14.tar    zstd    1   8558382592  3609250104  17136   476.27
 rawstudio-mint14.tar    zstd    3   8558382592  3341679997  29262   278.92
 rawstudio-mint14.tar    zstd    6   8558382592  3235846406  77904   104.77
 rawstudio-mint14.tar    zstd    9   8558382592  3160778861  140946  57.91
-rawstudio-mint14.tar    gzstd   1   8558382592  3926257486  57722   141.40
+rawstudio-mint14.tar    gzstd   1   8558382592  3926234992  51345   158.96
-rawstudio-mint14.tar    gzkp    1   8558382592  3962605659  45113   180.92
+rawstudio-mint14.tar    gzkp    1   8558382592  3960117298  36722   222.26
 CSV data:
 https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
 file                    out level   insize      outsize     millis  mb/s
-nyc-taxi-data-10M.csv   zskp    1   3325605752  641339945   8925    355.35
+nyc-taxi-data-10M.csv   zskp    1   3325605752  641319332   9462    335.17
-nyc-taxi-data-10M.csv   zskp    2   3325605752  591748091   11268   281.44
+nyc-taxi-data-10M.csv   zskp    2   3325605752  588976126   17570   180.50
-nyc-taxi-data-10M.csv   zskp    3   3325605752  530289687   25239   125.66
+nyc-taxi-data-10M.csv   zskp    3   3325605752  529329260   32432   97.79
-nyc-taxi-data-10M.csv   zskp    4   3325605752  476268884   135958  23.33
+nyc-taxi-data-10M.csv   zskp    4   3325605752  474949772   138025  22.98
 nyc-taxi-data-10M.csv   zstd    1   3325605752  687399637   8233    385.18
 nyc-taxi-data-10M.csv   zstd    3   3325605752  598514411   10065   315.07
 nyc-taxi-data-10M.csv   zstd    6   3325605752  570522953   20038   158.27
 nyc-taxi-data-10M.csv   zstd    9   3325605752  517554797   64565   49.12
-nyc-taxi-data-10M.csv   gzstd   1   3325605752  928656485   23876   132.83
+nyc-taxi-data-10M.csv   gzstd   1   3325605752  928654908   21270   149.11
-nyc-taxi-data-10M.csv   gzkp    1   3325605752  922257165   16780   189.00
+nyc-taxi-data-10M.csv   gzkp    1   3325605752  922273214   13929   227.68
 ```
 ## Decompressor
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@ -167,6 +167,11 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 			}
 			return ErrCompressedSizeTooBig
 		}
 		// Empty compressed blocks must at least be 2 bytes
 		// for Literals_Block_Type and one for Sequences_Section_Header.
 		if cSize < 2 {
 			return ErrBlockTooSmall
 		}
 	case blockTypeRaw:
 		if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
 			if debugDecoder {
@ -491,6 +496,9 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 }
 func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
 	if debugDecoder {
 		printf("prepareSequences: %d byte(s) input\n", len(in))
 	}
 	// Decode Sequences
 	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section
 	if len(in) < 1 {
@ -499,8 +507,6 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
 	var nSeqs int
 	seqHeader := in[0]
 	switch {
 	case seqHeader == 0:
 		in = in[1:]
 	case seqHeader < 128:
 		nSeqs = int(seqHeader)
 		in = in[1:]
@ -517,6 +523,13 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
 		nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
 		in = in[3:]
 	}
 	if nSeqs == 0 && len(in) != 0 {
 		// When no sequences, there should not be any more data...
 		if debugDecoder {
 			printf("prepareSequences: 0 sequences, but %d byte(s) left on stream\n", len(in))
 		}
 		return ErrUnexpectedBlockSize
 	}
 	var seqs = &hist.decoders
 	seqs.nSeqs = nSeqs
@ -635,6 +648,7 @@ func (b *blockDec) decodeSequences(hist *history) error {
 		hist.decoders.seqSize = len(hist.decoders.literals)
 		return nil
 	}
 	hist.decoders.windowSize = hist.windowSize
 	hist.decoders.prevOffset = hist.recentOffsets
 	err := hist.decoders.decode(b.sequence)
 	hist.recentOffsets = hist.decoders.prevOffset
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@ -348,10 +348,10 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 			frame.history.setDict(&dict)
 		}
-		if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
+		if frame.FrameContentSize != fcsUnknown && frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
 			return dst, ErrDecoderSizeExceeded
 		}
-		if frame.FrameContentSize > 0 && frame.FrameContentSize < 1<<30 {
+		if frame.FrameContentSize < 1<<30 {
 			// Never preallocate more than 1 GB up front.
 			if cap(dst)-len(dst) < int(frame.FrameContentSize) {
 				dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize))
@ -514,7 +514,7 @@ func (d *Decoder) nextBlockSync() (ok bool) {
 		// Check frame size (before CRC)
 		d.syncStream.decodedFrame += uint64(len(d.current.b))
-		if d.frame.FrameContentSize > 0 && d.syncStream.decodedFrame > d.frame.FrameContentSize {
+		if d.syncStream.decodedFrame > d.frame.FrameContentSize {
 			if debugDecoder {
 				printf("DecodedFrame (%d) > FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
 			}
@ -523,7 +523,7 @@ func (d *Decoder) nextBlockSync() (ok bool) {
 		}
 		// Check FCS
-		if d.current.d.Last && d.frame.FrameContentSize > 0 && d.syncStream.decodedFrame != d.frame.FrameContentSize {
+		if d.current.d.Last && d.frame.FrameContentSize != fcsUnknown && d.syncStream.decodedFrame != d.frame.FrameContentSize {
 			if debugDecoder {
 				printf("DecodedFrame (%d) != FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
 			}
@ -700,6 +700,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
 				}
 				hist.decoders = block.async.newHist.decoders
 				hist.recentOffsets = block.async.newHist.recentOffsets
 				hist.windowSize = block.async.newHist.windowSize
 				if block.async.newHist.dict != nil {
 					hist.setDict(block.async.newHist.dict)
 				}
@ -811,11 +812,11 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
 			}
 			if !hasErr {
 				decodedFrame += uint64(len(do.b))
-				if fcs > 0 && decodedFrame > fcs {
+				if decodedFrame > fcs {
 					println("fcs exceeded", block.Last, fcs, decodedFrame)
 					do.err = ErrFrameSizeExceeded
 					hasErr = true
-				} else if block.Last && fcs > 0 && decodedFrame != fcs {
+				} else if block.Last && fcs != fcsUnknown && decodedFrame != fcs {
 					do.err = ErrFrameSizeMismatch
 					hasErr = true
 				} else {
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@ -197,7 +197,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 	default:
 		fcsSize = 1 << v
 	}
-	d.FrameContentSize = 0
+	d.FrameContentSize = fcsUnknown
 	if fcsSize > 0 {
 		b, err := br.readSmall(fcsSize)
 		if err != nil {
@ -343,12 +343,7 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
 			err = ErrDecoderSizeExceeded
 			break
 		}
-		if d.SingleSegment && uint64(len(d.history.b)) > d.o.maxDecodedSize {
+		if uint64(len(d.history.b)-crcStart) > d.FrameContentSize {
 			println("runDecoder: single segment and", uint64(len(d.history.b)), ">", d.o.maxDecodedSize)
 			err = ErrFrameSizeExceeded
 			break
 		}
 		if d.FrameContentSize > 0 && uint64(len(d.history.b)-crcStart) > d.FrameContentSize {
 			println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize)
 			err = ErrFrameSizeExceeded
 			break
@ -356,13 +351,13 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
 		if dec.Last {
 			break
 		}
-		if debugDecoder && d.FrameContentSize > 0 {
+		if debugDecoder {
 			println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize)
 		}
 	}
 	dst = d.history.b
 	if err == nil {
-		if d.FrameContentSize > 0 && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
+		if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
 			err = ErrFrameSizeMismatch
 		} else if d.HasCheckSum {
 			var n int
--- a/vendor/github.com/klauspost/compress/zstd/fuzz.go
+++ b/vendor/github.com/klauspost/compress/zstd/fuzz.go
@ -1,5 +1,5 @@
-//go:build gofuzz
+//go:build ignorecrc
-// +build gofuzz
+// +build ignorecrc
 // Copyright 2019+ Klaus Post. All rights reserved.
 // License information can be found in the LICENSE file.
--- a/vendor/github.com/klauspost/compress/zstd/fuzz_none.go
+++ b/vendor/github.com/klauspost/compress/zstd/fuzz_none.go
@ -1,5 +1,5 @@
-//go:build !gofuzz
+//go:build !ignorecrc
-// +build !gofuzz
+// +build !ignorecrc
 // Copyright 2019+ Klaus Post. All rights reserved.
 // License information can be found in the LICENSE file.
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@ -107,7 +107,10 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 	llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
 	s.seqSize = 0
 	litRemain := len(s.literals)
-
+	maxBlockSize := maxCompressedBlockSize
 	if s.windowSize < maxBlockSize {
 		maxBlockSize = s.windowSize
 	}
 	for i := range seqs {
 		var ll, mo, ml int
 		if br.off > 4+((maxOffsetBits+16+16)>>3) {
@ -192,7 +195,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 		}
 		s.seqSize += ll + ml
 		if s.seqSize > maxBlockSize {
-			return fmt.Errorf("output (%d) bigger than max block size", s.seqSize)
+			return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
 		}
 		litRemain -= ll
 		if litRemain < 0 {
@ -230,7 +233,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 	}
 	s.seqSize += litRemain
 	if s.seqSize > maxBlockSize {
-		return fmt.Errorf("output (%d) bigger than max block size", s.seqSize)
+		return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
 	}
 	err := br.close()
 	if err != nil {
@ -347,6 +350,10 @@ func (s *sequenceDecs) decodeSync(history *history) error {
 	llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
 	hist := history.b[history.ignoreBuffer:]
 	out := s.out
 	maxBlockSize := maxCompressedBlockSize
 	if s.windowSize < maxBlockSize {
 		maxBlockSize = s.windowSize
 	}
 	for i := seqs - 1; i >= 0; i-- {
 		if br.overread() {
@ -426,7 +433,7 @@ func (s *sequenceDecs) decodeSync(history *history) error {
 		}
 		size := ll + ml + len(out)
 		if size-startSize > maxBlockSize {
-			return fmt.Errorf("output (%d) bigger than max block size", size)
+			return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
 		}
 		if size > cap(out) {
 			// Not enough size, which can happen under high volume block streaming conditions
@ -535,6 +542,11 @@ func (s *sequenceDecs) decodeSync(history *history) error {
 		}
 	}
 	// Check if space for literals
 	if len(s.literals)+len(s.out)-startSize > maxBlockSize {
 		return fmt.Errorf("output (%d) bigger than max block size (%d)", len(s.out), maxBlockSize)
 	}
 	// Add final literals
 	s.out = append(out, s.literals...)
 	return br.close()
--- a/vendor/github.com/klauspost/compress/zstd/zip.go
+++ b/vendor/github.com/klauspost/compress/zstd/zip.go
@ -20,7 +20,7 @@ const ZipMethodPKWare = 20
 var zipReaderPool sync.Pool
-// newZipReader cannot be used since we would leak goroutines...
+// newZipReader creates a pooled zip decompressor.
 func newZipReader(r io.Reader) io.ReadCloser {
 	dec, ok := zipReaderPool.Get().(*Decoder)
 	if ok {
@ -44,10 +44,14 @@ func (r *pooledZipReader) Read(p []byte) (n int, err error) {
 	r.mu.Lock()
 	defer r.mu.Unlock()
 	if r.dec == nil {
-		return 0, errors.New("Read after Close")
+		return 0, errors.New("read after close or EOF")
 	}
 	dec, err := r.dec.Read(p)
-
+	if err == io.EOF {
 		err = r.dec.Reset(nil)
 		zipReaderPool.Put(r.dec)
 		r.dec = nil
 	}
 	return dec, err
 }
@ -112,11 +116,5 @@ func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
 // ZipDecompressor returns a decompressor that can be registered with zip libraries.
 // See ZipCompressor for example.
 func ZipDecompressor() func(r io.Reader) io.ReadCloser {
-	return func(r io.Reader) io.ReadCloser {
+	return newZipReader
 		d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
 		if err != nil {
 			panic(err)
 		}
 		return d.IOReadCloser()
 	}
 }
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@ -39,6 +39,9 @@ const zstdMinMatch = 3
 // Reset the buffer offset when reaching this.
 const bufferReset = math.MaxInt32 - MaxWindowSize
 // fcsUnknown is used for unknown frame content size.
 const fcsUnknown = math.MaxUint64
 var (
 	// ErrReservedBlockType is returned when a reserved block type is found.
 	// Typically this indicates wrong or corrupted input.
@ -52,6 +55,10 @@ var (
 	// Typically returned on invalid input.
 	ErrBlockTooSmall = errors.New("block too small")
 	// ErrUnexpectedBlockSize is returned when a block has unexpected size.
 	// Typically returned on invalid input.
 	ErrUnexpectedBlockSize = errors.New("unexpected block size")
 	// ErrMagicMismatch is returned when a "magic" number isn't what is expected.
 	// Typically this indicates wrong or corrupted input.
 	ErrMagicMismatch = errors.New("invalid input: magic number mismatch")
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -133,7 +133,7 @@ github.com/imdario/mergo
 # github.com/inconshreveable/mousetrap v1.0.0
 ## explicit
 github.com/inconshreveable/mousetrap
-# github.com/klauspost/compress v1.15.0
+# github.com/klauspost/compress v1.15.1
 ## explicit; go 1.15
 github.com/klauspost/compress
 github.com/klauspost/compress/fse