vendor: github.com/klauspost/compress v1.15.12

full diff: https://github.com/klauspost/compress/compare/v1.15.9...v1.15.12

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
Sebastiaan van Stijn 2022-12-22 22:45:42 +01:00
parent d7869beade
commit cd2098c461
No known key found for this signature in database
GPG Key ID: 76698F39D527CE8C
26 changed files with 255 additions and 97 deletions

View File

@ -56,7 +56,7 @@ require (
github.com/golang/protobuf v1.5.2 // indirect
github.com/gorilla/mux v1.8.0 // indirect
github.com/inconshreveable/mousetrap v1.0.1 // indirect
github.com/klauspost/compress v1.15.9 // indirect
github.com/klauspost/compress v1.15.12 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/miekg/pkcs11 v1.1.1 // indirect
github.com/moby/sys/symlink v0.2.0 // indirect

View File

@ -251,8 +251,8 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/klauspost/compress v1.15.12 h1:YClS/PImqYbn+UILDnqxQCZ3RehC9N318SU3kElDUEM=
github.com/klauspost/compress v1.15.12/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=

View File

@ -17,6 +17,30 @@ This package provides various compression algorithms.
# changelog
* Sept 26, 2022 (v1.15.11)
* flate: Improve level 1-3 compression https://github.com/klauspost/compress/pull/678
* zstd: Improve "best" compression by @nightwolfz in https://github.com/klauspost/compress/pull/677
* zstd: Fix+reduce decompression allocations https://github.com/klauspost/compress/pull/668
* zstd: Fix non-effective noescape tag https://github.com/klauspost/compress/pull/667
* Sept 16, 2022 (v1.15.10)
* zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649
* Add Go 1.19 - deprecate Go 1.16 https://github.com/klauspost/compress/pull/651
* flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656
* zstd: Improve "better" compresssion https://github.com/klauspost/compress/pull/657
* s2: Improve "best" compression https://github.com/klauspost/compress/pull/658
* s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635
* s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646
* Use arrays for constant size copies https://github.com/klauspost/compress/pull/659
* July 21, 2022 (v1.15.9)
* zstd: Fix decoder crash on amd64 (no BMI) on invalid input https://github.com/klauspost/compress/pull/645
* zstd: Disable decoder extended memory copies (amd64) due to possible crashes https://github.com/klauspost/compress/pull/644
* zstd: Allow single segments up to "max decoded size" by @klauspost in https://github.com/klauspost/compress/pull/643
* July 13, 2022 (v1.15.8)
* gzip: fix stack exhaustion bug in Reader.Read https://github.com/klauspost/compress/pull/641
@ -91,14 +115,14 @@ This package provides various compression algorithms.
* gzhttp: Add zstd to transport by @klauspost in [#400](https://github.com/klauspost/compress/pull/400)
* gzhttp: Make content-type optional by @klauspost in [#510](https://github.com/klauspost/compress/pull/510)
<details>
<summary>See Details</summary>
Both compression and decompression now supports "synchronous" stream operations. This means that whenever "concurrency" is set to 1, they will operate without spawning goroutines.
Stream decompression is now faster on asynchronous, since the goroutine allocation much more effectively splits the workload. On typical streams this will typically use 2 cores fully for decompression. When a stream has finished decoding no goroutines will be left over, so decoders can now safely be pooled and still be garbage collected.
While the release has been extensively tested, it is recommended to testing when upgrading.
</details>
<details>
<summary>See changes to v1.14.x</summary>
* Feb 22, 2022 (v1.14.4)
* flate: Fix rare huffman only (-2) corruption. [#503](https://github.com/klauspost/compress/pull/503)
@ -125,6 +149,7 @@ While the release has been extensively tested, it is recommended to testing when
* zstd: Performance improvement in [#420]( https://github.com/klauspost/compress/pull/420) [#456](https://github.com/klauspost/compress/pull/456) [#437](https://github.com/klauspost/compress/pull/437) [#467](https://github.com/klauspost/compress/pull/467) [#468](https://github.com/klauspost/compress/pull/468)
* zstd: add arm64 xxhash assembly in [#464](https://github.com/klauspost/compress/pull/464)
* Add garbled for binaries for s2 in [#445](https://github.com/klauspost/compress/pull/445)
</details>
<details>
<summary>See changes to v1.13.x</summary>

View File

@ -763,17 +763,20 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
copy(out, buf[0][:])
copy(out[dstEvery:], buf[1][:])
copy(out[dstEvery*2:], buf[2][:])
copy(out[dstEvery*3:], buf[3][:])
out = out[bufoff:]
decoded += bufoff * 4
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
if len(out)-bufoff < dstEvery*3 {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
//copy(out, buf[0][:])
//copy(out[dstEvery:], buf[1][:])
//copy(out[dstEvery*2:], buf[2][:])
*(*[bufoff]byte)(out) = buf[0]
*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
out = out[bufoff:]
decoded += bufoff * 4
}
}
if off > 0 {
@ -997,17 +1000,22 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
copy(out, buf[0][:])
copy(out[dstEvery:], buf[1][:])
copy(out[dstEvery*2:], buf[2][:])
copy(out[dstEvery*3:], buf[3][:])
out = out[bufoff:]
decoded += bufoff * 4
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
if len(out)-bufoff < dstEvery*3 {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
//copy(out, buf[0][:])
//copy(out[dstEvery:], buf[1][:])
//copy(out[dstEvery*2:], buf[2][:])
// copy(out[dstEvery*3:], buf[3][:])
*(*[bufoff]byte)(out) = buf[0]
*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
out = out[bufoff:]
decoded += bufoff * 4
}
}
if off > 0 {

View File

@ -14,12 +14,14 @@ import (
// decompress4x_main_loop_x86 is an x86 assembler implementation
// of Decompress4X when tablelog > 8.
//
//go:noescape
func decompress4x_main_loop_amd64(ctx *decompress4xContext)
// decompress4x_8b_loop_x86 is an x86 assembler implementation
// of Decompress4X when tablelog <= 8 which decodes 4 entries
// per loop.
//
//go:noescape
func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
@ -145,11 +147,13 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
// decompress4x_main_loop_x86 is an x86 assembler implementation
// of Decompress1X when tablelog > 8.
//
//go:noescape
func decompress1x_main_loop_amd64(ctx *decompress1xContext)
// decompress4x_main_loop_x86 is an x86 with BMI2 assembler implementation
// of Decompress1X when tablelog > 8.
//
//go:noescape
func decompress1x_main_loop_bmi2(ctx *decompress1xContext)

View File

@ -1,7 +1,6 @@
// Code generated by command: go run gen.go -out ../decompress_amd64.s -pkg=huff0. DO NOT EDIT.
//go:build amd64 && !appengine && !noasm && gc
// +build amd64,!appengine,!noasm,gc
// func decompress4x_main_loop_amd64(ctx *decompress4xContext)
TEXT ·decompress4x_main_loop_amd64(SB), $0-8

View File

@ -122,17 +122,21 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
copy(out, buf[0][:])
copy(out[dstEvery:], buf[1][:])
copy(out[dstEvery*2:], buf[2][:])
copy(out[dstEvery*3:], buf[3][:])
out = out[bufoff:]
decoded += bufoff * 4
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
if len(out)-bufoff < dstEvery*3 {
d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
//copy(out, buf[0][:])
//copy(out[dstEvery:], buf[1][:])
//copy(out[dstEvery*2:], buf[2][:])
//copy(out[dstEvery*3:], buf[3][:])
*(*[bufoff]byte)(out) = buf[0]
*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
out = out[bufoff:]
decoded += bufoff * 4
}
}
if off > 0 {

View File

@ -18,6 +18,7 @@ func load64(b []byte, i int) uint64 {
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 1 <= len(lit) && len(lit) <= 65536
func emitLiteral(dst, lit []byte) int {
@ -42,6 +43,7 @@ func emitLiteral(dst, lit []byte) int {
// emitCopy writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= 65535
// 4 <= length && length <= 65535
@ -89,6 +91,7 @@ func emitCopy(dst []byte, offset, length int) int {
// src[i:i+k-j] and src[j:k] have the same contents.
//
// It assumes that:
//
// 0 <= i && i < j && j <= len(src)
func extendMatch(src []byte, i, j int) int {
for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
@ -105,6 +108,7 @@ func hash(u, shift uint32) uint32 {
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlock(dst, src []byte) (d int) {

View File

@ -12,6 +12,8 @@ The `zstd` package is provided as open source software using a Go standard licen
Currently the package is heavily optimized for 64 bit processors and will be significantly slower on 32 bit processors.
For seekable zstd streams, see [this excellent package](https://github.com/SaveTheRbtz/zstd-seekable-format-go).
## Installation
Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`.

View File

@ -10,7 +10,6 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"sync"
@ -233,7 +232,7 @@ func (b *blockDec) decodeBuf(hist *history) error {
if b.lowMem {
b.dst = make([]byte, b.RLESize)
} else {
b.dst = make([]byte, maxBlockSize)
b.dst = make([]byte, maxCompressedBlockSize)
}
}
b.dst = b.dst[:b.RLESize]
@ -651,7 +650,7 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse))
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse))
buf.Write(in)
ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
os.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
}
return nil

View File

@ -7,7 +7,6 @@ package zstd
import (
"fmt"
"io"
"io/ioutil"
)
type byteBuffer interface {
@ -124,7 +123,7 @@ func (r *readerWrapper) readByte() (byte, error) {
}
func (r *readerWrapper) skipN(n int64) error {
n2, err := io.CopyN(ioutil.Discard, r.r, n)
n2, err := io.CopyN(io.Discard, r.r, n)
if n2 != n {
err = io.ErrUnexpectedEOF
}

View File

@ -35,6 +35,7 @@ type Decoder struct {
br readerWrapper
enabled bool
inFrame bool
dstBuf []byte
}
frame *frameDec
@ -187,21 +188,23 @@ func (d *Decoder) Reset(r io.Reader) error {
}
// If bytes buffer and < 5MB, do sync decoding anyway.
if bb, ok := r.(byter); ok && bb.Len() < 5<<20 {
if bb, ok := r.(byter); ok && bb.Len() < d.o.decodeBufsBelow && !d.o.limitToCap {
bb2 := bb
if debugDecoder {
println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
}
b := bb2.Bytes()
var dst []byte
if cap(d.current.b) > 0 {
dst = d.current.b
if cap(d.syncStream.dstBuf) > 0 {
dst = d.syncStream.dstBuf[:0]
}
dst, err := d.DecodeAll(b, dst[:0])
dst, err := d.DecodeAll(b, dst)
if err == nil {
err = io.EOF
}
// Save output buffer
d.syncStream.dstBuf = dst
d.current.b = dst
d.current.err = err
d.current.flushed = true
@ -216,6 +219,7 @@ func (d *Decoder) Reset(r io.Reader) error {
d.current.err = nil
d.current.flushed = false
d.current.d = nil
d.syncStream.dstBuf = nil
// Ensure no-one else is still running...
d.streamWg.Wait()
@ -312,6 +316,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
// Grab a block decoder and frame decoder.
block := <-d.decoders
frame := block.localFrame
initialSize := len(dst)
defer func() {
if debugDecoder {
printf("re-adding decoder: %p", block)
@ -354,7 +359,16 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
return dst, ErrWindowSizeExceeded
}
if frame.FrameContentSize != fcsUnknown {
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)-initialSize) {
if debugDecoder {
println("decoder size exceeded; fcs:", frame.FrameContentSize, "> mcs:", d.o.maxDecodedSize-uint64(len(dst)-initialSize), "len:", len(dst))
}
return dst, ErrDecoderSizeExceeded
}
if d.o.limitToCap && frame.FrameContentSize > uint64(cap(dst)-len(dst)) {
if debugDecoder {
println("decoder size exceeded; fcs:", frame.FrameContentSize, "> (cap-len)", cap(dst)-len(dst))
}
return dst, ErrDecoderSizeExceeded
}
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
@ -364,7 +378,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
}
}
if cap(dst) == 0 {
if cap(dst) == 0 && !d.o.limitToCap {
// Allocate len(input) * 2 by default if nothing is provided
// and we didn't get frame content size.
size := len(input) * 2
@ -382,6 +396,9 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
if err != nil {
return dst, err
}
if uint64(len(dst)-initialSize) > d.o.maxDecodedSize {
return dst, ErrDecoderSizeExceeded
}
if len(frame.bBuf) == 0 {
if debugDecoder {
println("frame dbuf empty")
@ -667,6 +684,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
if debugDecoder {
println("Async 1: new history, recent:", block.async.newHist.recentOffsets)
}
hist.reset()
hist.decoders = block.async.newHist.decoders
hist.recentOffsets = block.async.newHist.recentOffsets
hist.windowSize = block.async.newHist.windowSize
@ -698,6 +716,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
seqExecute <- block
}
close(seqExecute)
hist.reset()
}()
var wg sync.WaitGroup
@ -721,6 +740,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
if debugDecoder {
println("Async 2: new history")
}
hist.reset()
hist.windowSize = block.async.newHist.windowSize
hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
if block.async.newHist.dict != nil {
@ -750,7 +770,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
if block.lowMem {
block.dst = make([]byte, block.RLESize)
} else {
block.dst = make([]byte, maxBlockSize)
block.dst = make([]byte, maxCompressedBlockSize)
}
}
block.dst = block.dst[:block.RLESize]
@ -802,13 +822,14 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
if debugDecoder {
println("decoder goroutines finished")
}
hist.reset()
}()
var hist history
decodeStream:
for {
var hist history
var hasErr bool
hist.reset()
decodeBlock := func(block *blockDec) {
if hasErr {
if block != nil {
@ -852,6 +873,10 @@ decodeStream:
}
}
if err == nil && d.frame.WindowSize > d.o.maxWindowSize {
if debugDecoder {
println("decoder size exceeded, fws:", d.frame.WindowSize, "> mws:", d.o.maxWindowSize)
}
err = ErrDecoderSizeExceeded
}
if err != nil {
@ -920,5 +945,6 @@ decodeStream:
}
close(seqDecode)
wg.Wait()
hist.reset()
d.frame.history.b = frameHistCache
}

View File

@ -20,6 +20,8 @@ type decoderOptions struct {
maxWindowSize uint64
dicts []dict
ignoreChecksum bool
limitToCap bool
decodeBufsBelow int
}
func (o *decoderOptions) setDefault() {
@ -28,6 +30,7 @@ func (o *decoderOptions) setDefault() {
lowMem: true,
concurrent: runtime.GOMAXPROCS(0),
maxWindowSize: MaxWindowSize,
decodeBufsBelow: 128 << 10,
}
if o.concurrent > 4 {
o.concurrent = 4
@ -114,6 +117,29 @@ func WithDecoderMaxWindow(size uint64) DOption {
}
}
// WithDecodeAllCapLimit will limit DecodeAll to decoding cap(dst)-len(dst) bytes,
// or any size set in WithDecoderMaxMemory.
// This can be used to limit decoding to a specific maximum output size.
// Disabled by default.
func WithDecodeAllCapLimit(b bool) DOption {
return func(o *decoderOptions) error {
o.limitToCap = b
return nil
}
}
// WithDecodeBuffersBelow will fully decode readers that have a
// `Bytes() []byte` and `Len() int` interface similar to bytes.Buffer.
// This typically uses less allocations but will have the full decompressed object in memory.
// Note that DecodeAllCapLimit will disable this, as well as giving a size of 0 or less.
// Default is 128KiB.
func WithDecodeBuffersBelow(size int) DOption {
return func(o *decoderOptions) error {
o.decodeBufsBelow = size
return nil
}
}
// IgnoreChecksum allows to forcibly ignore checksum checking.
func IgnoreChecksum(b bool) DOption {
return func(o *decoderOptions) error {

View File

@ -32,6 +32,7 @@ type match struct {
length int32
rep int32
est int32
_ [12]byte // Aligned size to cache line: 4+4+4+4+4 bytes + 12 bytes padding = 32 bytes
}
const highScore = 25000

View File

@ -416,15 +416,23 @@ encodeLoop:
// Try to find a better match by searching for a long match at the end of the current best match
if s+matched < sLimit {
// Allow some bytes at the beginning to mismatch.
// Sweet spot is around 3 bytes, but depends on input.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 3
nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
cv := load3232(src, s)
s2 := s + skipBeginning
cv := load3232(src, s2)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
coffsetL := candidateL.offset - e.cur - matched + skipBeginning
if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
s = s2
matched = matchedNext
if debugMatches {
println("long match at end-of-match")
@ -434,12 +442,13 @@ encodeLoop:
// Check prev long...
if true {
coffsetL = candidateL.prev - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
coffsetL = candidateL.prev - e.cur - matched + skipBeginning
if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
s = s2
matched = matchedNext
if debugMatches {
println("prev long match at end-of-match")

View File

@ -1103,7 +1103,8 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
}
if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
copy(e.longTable[:], e.dictLongTable)
//copy(e.longTable[:], e.dictLongTable)
e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable)
for i := range e.longTableShardDirty {
e.longTableShardDirty[i] = false
}
@ -1114,7 +1115,9 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
continue
}
copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
// copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
*(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:])
e.longTableShardDirty[i] = false
}
}

View File

@ -304,7 +304,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debugEncoder {
if len(src) > maxBlockSize {
if len(src) > maxCompressedBlockSize {
panic("src too big")
}
}
@ -871,7 +871,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
const shardCnt = tableShardCnt
const shardSize = tableShardSize
if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
copy(e.table[:], e.dictTable)
//copy(e.table[:], e.dictTable)
e.table = *(*[tableSize]tableEntry)(e.dictTable)
for i := range e.tableShardDirty {
e.tableShardDirty[i] = false
}
@ -883,7 +884,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
continue
}
copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
//copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
*(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:])
e.tableShardDirty[i] = false
}
e.allDirty = false

View File

@ -261,12 +261,17 @@ func (d *frameDec) reset(br byteBuffer) error {
}
d.history.windowSize = int(d.WindowSize)
if !d.o.lowMem || d.history.windowSize < maxBlockSize {
// Alloc 2x window size if not low-mem, or very small window size.
// Alloc 2x window size if not low-mem, or window size below 2MB.
d.history.allocFrameBuffer = d.history.windowSize * 2
} else {
// Alloc with one additional block
if d.o.lowMem {
// Alloc with 1MB extra.
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize/2
} else {
// Alloc with 2MB extra.
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
}
}
if debugDecoder {
println("Frame: Dict:", d.DictionaryID, "FrameContentSize:", d.FrameContentSize, "singleseg:", d.SingleSegment, "window:", d.WindowSize, "crc:", d.HasCheckSum)
@ -343,7 +348,7 @@ func (d *frameDec) consumeCRC() error {
return nil
}
// runDecoder will create a sync decoder that will decode a block of data.
// runDecoder will run the decoder for the remainder of the frame.
func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
saved := d.history.b
@ -353,12 +358,23 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
// Store input length, so we only check new data.
crcStart := len(dst)
d.history.decoders.maxSyncLen = 0
if d.o.limitToCap {
d.history.decoders.maxSyncLen = uint64(cap(dst) - len(dst))
}
if d.FrameContentSize != fcsUnknown {
if !d.o.limitToCap || d.FrameContentSize+uint64(len(dst)) < d.history.decoders.maxSyncLen {
d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
}
if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
if debugDecoder {
println("maxSyncLen:", d.history.decoders.maxSyncLen, "> maxDecodedSize:", d.o.maxDecodedSize)
}
return dst, ErrDecoderSizeExceeded
}
if uint64(cap(dst)) < d.history.decoders.maxSyncLen {
if debugDecoder {
println("maxSyncLen:", d.history.decoders.maxSyncLen)
}
if !d.o.limitToCap && uint64(cap(dst)) < d.history.decoders.maxSyncLen {
// Alloc for output
dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
copy(dst2, dst)
@ -378,7 +394,13 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
if err != nil {
break
}
if uint64(len(d.history.b)) > d.o.maxDecodedSize {
if uint64(len(d.history.b)-crcStart) > d.o.maxDecodedSize {
println("runDecoder: maxDecodedSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.o.maxDecodedSize)
err = ErrDecoderSizeExceeded
break
}
if d.o.limitToCap && len(d.history.b) > cap(dst) {
println("runDecoder: cap exceeded", uint64(len(d.history.b)), ">", cap(dst))
err = ErrDecoderSizeExceeded
break
}

View File

@ -21,6 +21,7 @@ type buildDtableAsmContext struct {
// buildDtable_asm is an x86 assembly implementation of fseDecoder.buildDtable.
// Function returns non-zero exit code on error.
//
//go:noescape
func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int

View File

@ -1,7 +1,6 @@
// Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm
// +build !appengine,!noasm,gc,!noasm
// func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
TEXT ·buildDtable_asm(SB), $0-24

View File

@ -37,26 +37,23 @@ func (h *history) reset() {
h.ignoreBuffer = 0
h.error = false
h.recentOffsets = [3]int{1, 4, 8}
if f := h.decoders.litLengths.fse; f != nil && !f.preDefined {
fseDecoderPool.Put(f)
}
if f := h.decoders.offsets.fse; f != nil && !f.preDefined {
fseDecoderPool.Put(f)
}
if f := h.decoders.matchLengths.fse; f != nil && !f.preDefined {
fseDecoderPool.Put(f)
}
h.decoders.freeDecoders()
h.decoders = sequenceDecs{br: h.decoders.br}
if h.huffTree != nil {
if h.dict == nil || h.dict.litEnc != h.huffTree {
huffDecoderPool.Put(h.huffTree)
}
}
h.freeHuffDecoder()
h.huffTree = nil
h.dict = nil
//printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b))
}
func (h *history) freeHuffDecoder() {
if h.huffTree != nil {
if h.dict == nil || h.dict.litEnc != h.huffTree {
huffDecoderPool.Put(h.huffTree)
h.huffTree = nil
}
}
}
func (h *history) setDict(dict *dict) {
if dict == nil {
return

View File

@ -99,6 +99,21 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) erro
return nil
}
func (s *sequenceDecs) freeDecoders() {
if f := s.litLengths.fse; f != nil && !f.preDefined {
fseDecoderPool.Put(f)
s.litLengths.fse = nil
}
if f := s.offsets.fse; f != nil && !f.preDefined {
fseDecoderPool.Put(f)
s.offsets.fse = nil
}
if f := s.matchLengths.fse; f != nil && !f.preDefined {
fseDecoderPool.Put(f)
s.matchLengths.fse = nil
}
}
// execute will execute the decoded sequence with the provided history.
// The sequence must be evaluated before being sent.
func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
@ -299,7 +314,10 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
}
size := ll + ml + len(out)
if size-startSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
if size-startSize == 424242 {
panic("here")
}
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
if size > cap(out) {
// Not enough size, which can happen under high volume block streaming conditions
@ -411,7 +429,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
// Check if space for literals
if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
// Add final literals

View File

@ -32,18 +32,22 @@ type decodeSyncAsmContext struct {
// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
//
//go:noescape
func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
//
//go:noescape
func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
//
//go:noescape
func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
//
//go:noescape
func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
@ -135,7 +139,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
if debugDecoder {
println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
}
return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
default:
return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
@ -143,7 +147,8 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
s.seqSize += ctx.litRemain
if s.seqSize > maxBlockSize {
return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
err := br.close()
if err != nil {
@ -201,20 +206,24 @@ const errorNotEnoughSpace = 5
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
//
//go:noescape
func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
//
//go:noescape
func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
//
//go:noescape
func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
//
//go:noescape
func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
@ -281,7 +290,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
s.seqSize += ctx.litRemain
if s.seqSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
err := br.close()
if err != nil {
@ -308,10 +317,12 @@ type executeAsmContext struct {
// Returns false if a match offset is too big.
//
// Please refer to seqdec_generic.go for the reference implementation.
//
//go:noescape
func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
// Same as above, but with safe memcopies
//
//go:noescape
func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool

View File

@ -1,7 +1,6 @@
// Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm
// +build !appengine,!noasm,gc,!noasm
// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV

View File

@ -111,7 +111,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
}
s.seqSize += ll + ml
if s.seqSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
litRemain -= ll
if litRemain < 0 {
@ -149,7 +149,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
}
s.seqSize += litRemain
if s.seqSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
}
err := br.close()
if err != nil {

4
vendor/modules.txt vendored
View File

@ -135,8 +135,8 @@ github.com/imdario/mergo
# github.com/inconshreveable/mousetrap v1.0.1
## explicit; go 1.18
github.com/inconshreveable/mousetrap
# github.com/klauspost/compress v1.15.9
## explicit; go 1.16
# github.com/klauspost/compress v1.15.12
## explicit; go 1.17
github.com/klauspost/compress
github.com/klauspost/compress/fse
github.com/klauspost/compress/huff0