Merge pull request #4628 from thaJeztah/24.0_backport_bump_compress

[24.0 backport] vendor: github.com/klauspost/compress v1.17.2
This commit is contained in:
Brian Goff 2023-10-25 17:42:02 -07:00 committed by GitHub
commit 12c309fe91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
38 changed files with 973 additions and 472 deletions

View File

@ -57,7 +57,7 @@ require (
github.com/golang/protobuf v1.5.2 // indirect github.com/golang/protobuf v1.5.2 // indirect
github.com/gorilla/mux v1.8.0 // indirect github.com/gorilla/mux v1.8.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/klauspost/compress v1.16.3 // indirect github.com/klauspost/compress v1.17.2 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/miekg/pkcs11 v1.1.1 // indirect github.com/miekg/pkcs11 v1.1.1 // indirect
github.com/moby/sys/symlink v0.2.0 // indirect github.com/moby/sys/symlink v0.2.0 // indirect

View File

@ -243,8 +243,8 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY= github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=

View File

@ -3,7 +3,7 @@
before: before:
hooks: hooks:
- ./gen.sh - ./gen.sh
- go install mvdan.cc/garble@v0.9.3 - go install mvdan.cc/garble@v0.10.1
builds: builds:
- -
@ -92,16 +92,7 @@ builds:
archives: archives:
- -
id: s2-binaries id: s2-binaries
name_template: "s2-{{ .Os }}_{{ .Arch }}_{{ .Version }}" name_template: "s2-{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
replacements:
aix: AIX
darwin: OSX
linux: Linux
windows: Windows
386: i386
amd64: x86_64
freebsd: FreeBSD
netbsd: NetBSD
format_overrides: format_overrides:
- goos: windows - goos: windows
format: zip format: zip
@ -125,7 +116,7 @@ changelog:
nfpms: nfpms:
- -
file_name_template: "s2_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}" file_name_template: "s2_package__{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}"
vendor: Klaus Post vendor: Klaus Post
homepage: https://github.com/klauspost/compress homepage: https://github.com/klauspost/compress
maintainer: Klaus Post <klauspost@gmail.com> maintainer: Klaus Post <klauspost@gmail.com>
@ -134,8 +125,3 @@ nfpms:
formats: formats:
- deb - deb
- rpm - rpm
replacements:
darwin: Darwin
linux: Linux
freebsd: FreeBSD
amd64: x86_64

View File

@ -16,6 +16,37 @@ This package provides various compression algorithms.
# changelog # changelog
* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0)
* Add experimental dictionary builder https://github.com/klauspost/compress/pull/853
* Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838
* flate: Add limited window compression https://github.com/klauspost/compress/pull/843
* s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839
* flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837
* gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860
* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7)
* zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829
* s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832
* June 13, 2023 - [v1.16.6](https://github.com/klauspost/compress/releases/tag/v1.16.6)
* zstd: correctly ignore WithEncoderPadding(1) by @ianlancetaylor in https://github.com/klauspost/compress/pull/806
* zstd: Add amd64 match length assembly https://github.com/klauspost/compress/pull/824
* gzhttp: Handle informational headers by @rtribotte in https://github.com/klauspost/compress/pull/815
* s2: Improve Better compression slightly https://github.com/klauspost/compress/pull/663
* Apr 16, 2023 - [v1.16.5](https://github.com/klauspost/compress/releases/tag/v1.16.5)
* zstd: readByte needs to use io.ReadFull by @jnoxon in https://github.com/klauspost/compress/pull/802
* gzip: Fix WriterTo after initial read https://github.com/klauspost/compress/pull/804
* Apr 5, 2023 - [v1.16.4](https://github.com/klauspost/compress/releases/tag/v1.16.4)
* zstd: Improve zstd best efficiency by @greatroar and @klauspost in https://github.com/klauspost/compress/pull/784
* zstd: Respect WithAllLitEntropyCompression https://github.com/klauspost/compress/pull/792
* zstd: Fix amd64 not always detecting corrupt data https://github.com/klauspost/compress/pull/785
* zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795
* s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779
* s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780
* gzhttp: Suppport ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799
* Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1) * Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1)
* zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776 * zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776
* gzhttp: Add optional [BREACH mitigation](https://github.com/klauspost/compress/tree/master/gzhttp#breach-mitigation). https://github.com/klauspost/compress/pull/762 https://github.com/klauspost/compress/pull/768 https://github.com/klauspost/compress/pull/769 https://github.com/klauspost/compress/pull/770 https://github.com/klauspost/compress/pull/767 * gzhttp: Add optional [BREACH mitigation](https://github.com/klauspost/compress/tree/master/gzhttp#breach-mitigation). https://github.com/klauspost/compress/pull/762 https://github.com/klauspost/compress/pull/768 https://github.com/klauspost/compress/pull/769 https://github.com/klauspost/compress/pull/770 https://github.com/klauspost/compress/pull/767
@ -31,6 +62,9 @@ This package provides various compression algorithms.
* s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747 * s2: Support io.ReaderAt in ReadSeeker. https://github.com/klauspost/compress/pull/747
* s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746 * s2c/s2sx: Use concurrent decoding. https://github.com/klauspost/compress/pull/746
<details>
<summary>See changes to v1.15.x</summary>
* Jan 21st, 2023 (v1.15.15) * Jan 21st, 2023 (v1.15.15)
* deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739 * deflate: Improve level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/739
* zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728 * zstd: Add delta encoding support by @greatroar in https://github.com/klauspost/compress/pull/728
@ -157,6 +191,8 @@ Stream decompression is now faster on asynchronous, since the goroutine allocati
While the release has been extensively tested, it is recommended to testing when upgrading. While the release has been extensively tested, it is recommended to testing when upgrading.
</details>
<details> <details>
<summary>See changes to v1.14.x</summary> <summary>See changes to v1.14.x</summary>
@ -615,6 +651,10 @@ Here are other packages of good quality and pure Go (no cgo wrappers or autoconv
* [github.com/pierrec/lz4](https://github.com/pierrec/lz4) - strong multithreaded LZ4 compression. * [github.com/pierrec/lz4](https://github.com/pierrec/lz4) - strong multithreaded LZ4 compression.
* [github.com/cosnicolaou/pbzip2](https://github.com/cosnicolaou/pbzip2) - multithreaded bzip2 decompression. * [github.com/cosnicolaou/pbzip2](https://github.com/cosnicolaou/pbzip2) - multithreaded bzip2 decompression.
* [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer. * [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer.
* [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression.
* [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression.
* [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index.
* [github.com/ybirader/pzip](https://github.com/ybirader/pzip) - Fast concurrent zip archiver and extractor.
# license # license

25
vendor/github.com/klauspost/compress/SECURITY.md generated vendored Normal file
View File

@ -0,0 +1,25 @@
# Security Policy
## Supported Versions
Security updates are applied only to the latest release.
## Vulnerability Definition
A security vulnerability is a bug that with certain input triggers a crash or an infinite loop. Most calls will have varying execution time and only in rare cases will slow operation be considered a security vulnerability.
Corrupted output generally is not considered a security vulnerability, unless independent operations are able to affect each other. Note that not all functionality is re-entrant and safe to use concurrently.
Out-of-memory crashes only applies if the en/decoder uses an abnormal amount of memory, with appropriate options applied, to limit maximum window size, concurrency, etc. However, if you are in doubt you are welcome to file a security issue.
It is assumed that all callers are trusted, meaning internal data exposed through reflection or inspection of returned data structures is not considered a vulnerability.
Vulnerabilities resulting from compiler/assembler errors should be reported upstream. Depending on the severity this package may or may not implement a workaround.
## Reporting a Vulnerability
If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released.
Please disclose it at [security advisory](https://github.com/klauspost/compress/security/advisories/new). If possible please provide a minimal reproducer. If the issue only applies to a single platform, it would be helpful to provide access to that.
This project is maintained by a team of volunteers on a reasonable-effort basis. As such, vulnerabilities will be disclosed in a best effort base.

View File

@ -152,12 +152,11 @@ func (b *bitWriter) flushAlign() {
// close will write the alignment bit and write the final byte(s) // close will write the alignment bit and write the final byte(s)
// to the output. // to the output.
func (b *bitWriter) close() error { func (b *bitWriter) close() {
// End mark // End mark
b.addBits16Clean(1, 1) b.addBits16Clean(1, 1)
// flush until next byte. // flush until next byte.
b.flushAlign() b.flushAlign()
return nil
} }
// reset and continue writing by appending to out. // reset and continue writing by appending to out.

View File

@ -199,7 +199,8 @@ func (s *Scratch) compress(src []byte) error {
c2.flush(s.actualTableLog) c2.flush(s.actualTableLog)
c1.flush(s.actualTableLog) c1.flush(s.actualTableLog)
return s.bw.close() s.bw.close()
return nil
} }
// writeCount will write the normalized histogram count to header. // writeCount will write the normalized histogram count to header.

View File

@ -13,14 +13,6 @@ type bitWriter struct {
out []byte out []byte
} }
// bitMask16 is bitmasks. Has extra to avoid bounds check.
var bitMask16 = [32]uint16{
0, 1, 3, 7, 0xF, 0x1F,
0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
0xFFFF, 0xFFFF} /* up to 16 bits */
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. // addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
// It will not check if there is space for them, so the caller must ensure that it has flushed recently. // It will not check if there is space for them, so the caller must ensure that it has flushed recently.
func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
@ -102,10 +94,9 @@ func (b *bitWriter) flushAlign() {
// close will write the alignment bit and write the final byte(s) // close will write the alignment bit and write the final byte(s)
// to the output. // to the output.
func (b *bitWriter) close() error { func (b *bitWriter) close() {
// End mark // End mark
b.addBits16Clean(1, 1) b.addBits16Clean(1, 1)
// flush until next byte. // flush until next byte.
b.flushAlign() b.flushAlign()
return nil
} }

View File

@ -227,10 +227,10 @@ func EstimateSizes(in []byte, s *Scratch) (tableSz, dataSz, reuseSz int, err err
} }
func (s *Scratch) compress1X(src []byte) ([]byte, error) { func (s *Scratch) compress1X(src []byte) ([]byte, error) {
return s.compress1xDo(s.Out, src) return s.compress1xDo(s.Out, src), nil
} }
func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) { func (s *Scratch) compress1xDo(dst, src []byte) []byte {
var bw = bitWriter{out: dst} var bw = bitWriter{out: dst}
// N is length divisible by 4. // N is length divisible by 4.
@ -260,8 +260,8 @@ func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
bw.encTwoSymbols(cTable, tmp[1], tmp[0]) bw.encTwoSymbols(cTable, tmp[1], tmp[0])
} }
} }
err := bw.close() bw.close()
return bw.out, err return bw.out
} }
var sixZeros [6]byte var sixZeros [6]byte
@ -283,12 +283,8 @@ func (s *Scratch) compress4X(src []byte) ([]byte, error) {
} }
src = src[len(toDo):] src = src[len(toDo):]
var err error
idx := len(s.Out) idx := len(s.Out)
s.Out, err = s.compress1xDo(s.Out, toDo) s.Out = s.compress1xDo(s.Out, toDo)
if err != nil {
return nil, err
}
if len(s.Out)-idx > math.MaxUint16 { if len(s.Out)-idx > math.MaxUint16 {
// We cannot store the size in the jump table // We cannot store the size in the jump table
return nil, ErrIncompressible return nil, ErrIncompressible
@ -315,7 +311,6 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
segmentSize := (len(src) + 3) / 4 segmentSize := (len(src) + 3) / 4
var wg sync.WaitGroup var wg sync.WaitGroup
var errs [4]error
wg.Add(4) wg.Add(4)
for i := 0; i < 4; i++ { for i := 0; i < 4; i++ {
toDo := src toDo := src
@ -326,15 +321,12 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
// Separate goroutine for each block. // Separate goroutine for each block.
go func(i int) { go func(i int) {
s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo) s.tmpOut[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
wg.Done() wg.Done()
}(i) }(i)
} }
wg.Wait() wg.Wait()
for i := 0; i < 4; i++ { for i := 0; i < 4; i++ {
if errs[i] != nil {
return nil, errs[i]
}
o := s.tmpOut[i] o := s.tmpOut[i]
if len(o) > math.MaxUint16 { if len(o) > math.MaxUint16 {
// We cannot store the size in the jump table // We cannot store the size in the jump table

View File

@ -253,7 +253,7 @@ func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
switch d.actualTableLog { switch d.actualTableLog {
case 8: case 8:
const shift = 8 - 8 const shift = 0
for br.off >= 4 { for br.off >= 4 {
br.fillFast() br.fillFast()
v := dt[uint8(br.value>>(56+shift))] v := dt[uint8(br.value>>(56+shift))]

View File

@ -87,18 +87,6 @@ func emitCopy(dst []byte, offset, length int) int {
return i + 2 return i + 2
} }
// extendMatch returns the largest k such that k <= len(src) and that
// src[i:i+k-j] and src[j:k] have the same contents.
//
// It assumes that:
//
// 0 <= i && i < j && j <= len(src)
func extendMatch(src []byte, i, j int) int {
for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
}
return j
}
func hash(u, shift uint32) uint32 { func hash(u, shift uint32) uint32 {
return (u * 0x1e35a7bd) >> shift return (u * 0x1e35a7bd) >> shift
} }

View File

@ -304,7 +304,7 @@ import "github.com/klauspost/compress/zstd"
// Create a reader that caches decompressors. // Create a reader that caches decompressors.
// For this operation type we supply a nil Reader. // For this operation type we supply a nil Reader.
var decoder, _ = zstd.NewReader(nil, WithDecoderConcurrency(0)) var decoder, _ = zstd.NewReader(nil, zstd.WithDecoderConcurrency(0))
// Decompress a buffer. We don't supply a destination buffer, // Decompress a buffer. We don't supply a destination buffer,
// so it will be allocated by the decoder. // so it will be allocated by the decoder.

View File

@ -17,7 +17,6 @@ import (
// for aligning the input. // for aligning the input.
type bitReader struct { type bitReader struct {
in []byte in []byte
off uint // next byte to read is at in[off - 1]
value uint64 // Maybe use [16]byte, but shifting is awkward. value uint64 // Maybe use [16]byte, but shifting is awkward.
bitsRead uint8 bitsRead uint8
} }
@ -28,7 +27,6 @@ func (b *bitReader) init(in []byte) error {
return errors.New("corrupt stream: too short") return errors.New("corrupt stream: too short")
} }
b.in = in b.in = in
b.off = uint(len(in))
// The highest bit of the last byte indicates where to start // The highest bit of the last byte indicates where to start
v := in[len(in)-1] v := in[len(in)-1]
if v == 0 { if v == 0 {
@ -69,21 +67,19 @@ func (b *bitReader) fillFast() {
if b.bitsRead < 32 { if b.bitsRead < 32 {
return return
} }
// 2 bounds checks. v := b.in[len(b.in)-4:]
v := b.in[b.off-4:] b.in = b.in[:len(b.in)-4]
v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low) b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32 b.bitsRead -= 32
b.off -= 4
} }
// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read. // fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
func (b *bitReader) fillFastStart() { func (b *bitReader) fillFastStart() {
// Do single re-slice to avoid bounds checks. v := b.in[len(b.in)-8:]
b.value = binary.LittleEndian.Uint64(b.in[b.off-8:]) b.in = b.in[:len(b.in)-8]
b.value = binary.LittleEndian.Uint64(v)
b.bitsRead = 0 b.bitsRead = 0
b.off -= 8
} }
// fill() will make sure at least 32 bits are available. // fill() will make sure at least 32 bits are available.
@ -91,25 +87,25 @@ func (b *bitReader) fill() {
if b.bitsRead < 32 { if b.bitsRead < 32 {
return return
} }
if b.off >= 4 { if len(b.in) >= 4 {
v := b.in[b.off-4:] v := b.in[len(b.in)-4:]
v = v[:4] b.in = b.in[:len(b.in)-4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low) b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32 b.bitsRead -= 32
b.off -= 4
return return
} }
for b.off > 0 {
b.value = (b.value << 8) | uint64(b.in[b.off-1]) b.bitsRead -= uint8(8 * len(b.in))
b.bitsRead -= 8 for len(b.in) > 0 {
b.off-- b.value = (b.value << 8) | uint64(b.in[len(b.in)-1])
b.in = b.in[:len(b.in)-1]
} }
} }
// finished returns true if all bits have been read from the bit stream. // finished returns true if all bits have been read from the bit stream.
func (b *bitReader) finished() bool { func (b *bitReader) finished() bool {
return b.off == 0 && b.bitsRead >= 64 return len(b.in) == 0 && b.bitsRead >= 64
} }
// overread returns true if more bits have been requested than is on the stream. // overread returns true if more bits have been requested than is on the stream.
@ -119,7 +115,7 @@ func (b *bitReader) overread() bool {
// remain returns the number of bits remaining. // remain returns the number of bits remaining.
func (b *bitReader) remain() uint { func (b *bitReader) remain() uint {
return b.off*8 + 64 - uint(b.bitsRead) return 8*uint(len(b.in)) + 64 - uint(b.bitsRead)
} }
// close the bitstream and returns an error if out-of-buffer reads occurred. // close the bitstream and returns an error if out-of-buffer reads occurred.

View File

@ -97,12 +97,11 @@ func (b *bitWriter) flushAlign() {
// close will write the alignment bit and write the final byte(s) // close will write the alignment bit and write the final byte(s)
// to the output. // to the output.
func (b *bitWriter) close() error { func (b *bitWriter) close() {
// End mark // End mark
b.addBits16Clean(1, 1) b.addBits16Clean(1, 1)
// flush until next byte. // flush until next byte.
b.flushAlign() b.flushAlign()
return nil
} }
// reset and continue writing by appending to out. // reset and continue writing by appending to out.

View File

@ -592,7 +592,7 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
} }
seq.fse.setRLE(symb) seq.fse.setRLE(symb)
if debugDecoder { if debugDecoder {
printf("RLE set to %+v, code: %v", symb, v) printf("RLE set to 0x%x, code: %v", symb, v)
} }
case compModeFSE: case compModeFSE:
println("Reading table for", tableIndex(i)) println("Reading table for", tableIndex(i))

View File

@ -361,14 +361,21 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
if len(lits) >= 1024 { if len(lits) >= 1024 {
// Use 4 Streams. // Use 4 Streams.
out, reUsed, err = huff0.Compress4X(lits, b.litEnc) out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
} else if len(lits) > 32 { } else if len(lits) > 16 {
// Use 1 stream // Use 1 stream
single = true single = true
out, reUsed, err = huff0.Compress1X(lits, b.litEnc) out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
} else { } else {
err = huff0.ErrIncompressible err = huff0.ErrIncompressible
} }
if err == nil && len(out)+5 > len(lits) {
// If we are close, we may still be worse or equal to raw.
var lh literalsHeader
lh.setSizes(len(out), len(lits), single)
if len(out)+lh.size() >= len(lits) {
err = huff0.ErrIncompressible
}
}
switch err { switch err {
case huff0.ErrIncompressible: case huff0.ErrIncompressible:
if debugEncoder { if debugEncoder {
@ -473,7 +480,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
return b.encodeLits(b.literals, rawAllLits) return b.encodeLits(b.literals, rawAllLits)
} }
// We want some difference to at least account for the headers. // We want some difference to at least account for the headers.
saved := b.size - len(b.literals) - (b.size >> 5) saved := b.size - len(b.literals) - (b.size >> 6)
if saved < 16 { if saved < 16 {
if org == nil { if org == nil {
return errIncompressible return errIncompressible
@ -503,7 +510,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if len(b.literals) >= 1024 && !raw { if len(b.literals) >= 1024 && !raw {
// Use 4 Streams. // Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
} else if len(b.literals) > 32 && !raw { } else if len(b.literals) > 16 && !raw {
// Use 1 stream // Use 1 stream
single = true single = true
out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
@ -511,6 +518,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
err = huff0.ErrIncompressible err = huff0.ErrIncompressible
} }
if err == nil && len(out)+5 > len(b.literals) {
// If we are close, we may still be worse or equal to raw.
var lh literalsHeader
lh.setSize(len(b.literals))
szRaw := lh.size()
lh.setSizes(len(out), len(b.literals), single)
szComp := lh.size()
if len(out)+szComp >= len(b.literals)+szRaw {
err = huff0.ErrIncompressible
}
}
switch err { switch err {
case huff0.ErrIncompressible: case huff0.ErrIncompressible:
lh.setType(literalsBlockRaw) lh.setType(literalsBlockRaw)
@ -773,16 +791,16 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
ml.flush(mlEnc.actualTableLog) ml.flush(mlEnc.actualTableLog)
of.flush(ofEnc.actualTableLog) of.flush(ofEnc.actualTableLog)
ll.flush(llEnc.actualTableLog) ll.flush(llEnc.actualTableLog)
err = wr.close() wr.close()
if err != nil {
return err
}
b.output = wr.out b.output = wr.out
// Maybe even add a bigger margin.
if len(b.output)-3-bhOffset >= b.size { if len(b.output)-3-bhOffset >= b.size {
// Maybe even add a bigger margin. // Discard and encode as raw block.
b.output = b.encodeRawTo(b.output[:bhOffset], org)
b.popOffsets()
b.litEnc.Reuse = huff0.ReusePolicyNone b.litEnc.Reuse = huff0.ReusePolicyNone
return errIncompressible return nil
} }
// Size is output minus block header. // Size is output minus block header.

View File

@ -109,7 +109,7 @@ func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {
} }
func (r *readerWrapper) readByte() (byte, error) { func (r *readerWrapper) readByte() (byte, error) {
n2, err := r.r.Read(r.tmp[:1]) n2, err := io.ReadFull(r.r, r.tmp[:1])
if err != nil { if err != nil {
if err == io.EOF { if err == io.EOF {
err = io.ErrUnexpectedEOF err = io.ErrUnexpectedEOF

View File

@ -455,12 +455,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
} }
if len(next.b) > 0 { if len(next.b) > 0 {
n, err := d.current.crc.Write(next.b) d.current.crc.Write(next.b)
if err == nil {
if n != len(next.b) {
d.current.err = io.ErrShortWrite
}
}
} }
if next.err == nil && next.d != nil && next.d.hasCRC { if next.err == nil && next.d != nil && next.d.hasCRC {
got := uint32(d.current.crc.Sum64()) got := uint32(d.current.crc.Sum64())

View File

@ -107,7 +107,7 @@ func WithDecoderDicts(dicts ...[]byte) DOption {
} }
} }
// WithEncoderDictRaw registers a dictionary that may be used by the decoder. // WithDecoderDictRaw registers a dictionary that may be used by the decoder.
// The slice content can be arbitrary data. // The slice content can be arbitrary data.
func WithDecoderDictRaw(id uint32, content []byte) DOption { func WithDecoderDictRaw(id uint32, content []byte) DOption {
return func(o *decoderOptions) error { return func(o *decoderOptions) error {

View File

@ -1,10 +1,13 @@
package zstd package zstd
import ( import (
"bytes"
"encoding/binary" "encoding/binary"
"errors" "errors"
"fmt" "fmt"
"io" "io"
"math"
"sort"
"github.com/klauspost/compress/huff0" "github.com/klauspost/compress/huff0"
) )
@ -14,9 +17,8 @@ type dict struct {
litEnc *huff0.Scratch litEnc *huff0.Scratch
llDec, ofDec, mlDec sequenceDec llDec, ofDec, mlDec sequenceDec
//llEnc, ofEnc, mlEnc []*fseEncoder offsets [3]int
offsets [3]int content []byte
content []byte
} }
const dictMagic = "\x37\xa4\x30\xec" const dictMagic = "\x37\xa4\x30\xec"
@ -159,3 +161,374 @@ func InspectDictionary(b []byte) (interface {
d, err := loadDict(b) d, err := loadDict(b)
return d, err return d, err
} }
type BuildDictOptions struct {
// Dictionary ID.
ID uint32
// Content to use to create dictionary tables.
Contents [][]byte
// History to use for all blocks.
History []byte
// Offsets to use.
Offsets [3]int
// CompatV155 will make the dictionary compatible with Zstd v1.5.5 and earlier.
// See https://github.com/facebook/zstd/issues/3724
CompatV155 bool
// Use the specified encoder level.
// The dictionary will be built using the specified encoder level,
// which will reflect speed and make the dictionary tailored for that level.
// If not set SpeedBestCompression will be used.
Level EncoderLevel
// DebugOut will write stats and other details here if set.
DebugOut io.Writer
}
func BuildDict(o BuildDictOptions) ([]byte, error) {
initPredefined()
hist := o.History
contents := o.Contents
debug := o.DebugOut != nil
println := func(args ...interface{}) {
if o.DebugOut != nil {
fmt.Fprintln(o.DebugOut, args...)
}
}
printf := func(s string, args ...interface{}) {
if o.DebugOut != nil {
fmt.Fprintf(o.DebugOut, s, args...)
}
}
print := func(args ...interface{}) {
if o.DebugOut != nil {
fmt.Fprint(o.DebugOut, args...)
}
}
if int64(len(hist)) > dictMaxLength {
return nil, fmt.Errorf("dictionary of size %d > %d", len(hist), int64(dictMaxLength))
}
if len(hist) < 8 {
return nil, fmt.Errorf("dictionary of size %d < %d", len(hist), 8)
}
if len(contents) == 0 {
return nil, errors.New("no content provided")
}
d := dict{
id: o.ID,
litEnc: nil,
llDec: sequenceDec{},
ofDec: sequenceDec{},
mlDec: sequenceDec{},
offsets: o.Offsets,
content: hist,
}
block := blockEnc{lowMem: false}
block.init()
enc := encoder(&bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(maxMatchLen), bufferReset: math.MaxInt32 - int32(maxMatchLen*2), lowMem: false}})
if o.Level != 0 {
eOpts := encoderOptions{
level: o.Level,
blockSize: maxMatchLen,
windowSize: maxMatchLen,
dict: &d,
lowMem: false,
}
enc = eOpts.encoder()
} else {
o.Level = SpeedBestCompression
}
var (
remain [256]int
ll [256]int
ml [256]int
of [256]int
)
addValues := func(dst *[256]int, src []byte) {
for _, v := range src {
dst[v]++
}
}
addHist := func(dst *[256]int, src *[256]uint32) {
for i, v := range src {
dst[i] += int(v)
}
}
seqs := 0
nUsed := 0
litTotal := 0
newOffsets := make(map[uint32]int, 1000)
for _, b := range contents {
block.reset(nil)
if len(b) < 8 {
continue
}
nUsed++
enc.Reset(&d, true)
enc.Encode(&block, b)
addValues(&remain, block.literals)
litTotal += len(block.literals)
seqs += len(block.sequences)
block.genCodes()
addHist(&ll, block.coders.llEnc.Histogram())
addHist(&ml, block.coders.mlEnc.Histogram())
addHist(&of, block.coders.ofEnc.Histogram())
for i, seq := range block.sequences {
if i > 3 {
break
}
offset := seq.offset
if offset == 0 {
continue
}
if offset > 3 {
newOffsets[offset-3]++
} else {
newOffsets[uint32(o.Offsets[offset-1])]++
}
}
}
// Find most used offsets.
var sortedOffsets []uint32
for k := range newOffsets {
sortedOffsets = append(sortedOffsets, k)
}
sort.Slice(sortedOffsets, func(i, j int) bool {
a, b := sortedOffsets[i], sortedOffsets[j]
if a == b {
// Prefer the longer offset
return sortedOffsets[i] > sortedOffsets[j]
}
return newOffsets[sortedOffsets[i]] > newOffsets[sortedOffsets[j]]
})
if len(sortedOffsets) > 3 {
if debug {
print("Offsets:")
for i, v := range sortedOffsets {
if i > 20 {
break
}
printf("[%d: %d],", v, newOffsets[v])
}
println("")
}
sortedOffsets = sortedOffsets[:3]
}
for i, v := range sortedOffsets {
o.Offsets[i] = int(v)
}
if debug {
println("New repeat offsets", o.Offsets)
}
if nUsed == 0 || seqs == 0 {
return nil, fmt.Errorf("%d blocks, %d sequences found", nUsed, seqs)
}
if debug {
println("Sequences:", seqs, "Blocks:", nUsed, "Literals:", litTotal)
}
if seqs/nUsed < 512 {
// Use 512 as minimum.
nUsed = seqs / 512
}
copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) {
hist := dst.Histogram()
var maxSym uint8
var maxCount int
var fakeLength int
for i, v := range src {
if v > 0 {
v = v / nUsed
if v == 0 {
v = 1
}
}
if v > maxCount {
maxCount = v
}
if v != 0 {
maxSym = uint8(i)
}
fakeLength += v
hist[i] = uint32(v)
}
dst.HistogramFinished(maxSym, maxCount)
dst.reUsed = false
dst.useRLE = false
err := dst.normalizeCount(fakeLength)
if err != nil {
return nil, err
}
if debug {
println("RAW:", dst.count[:maxSym+1], "NORM:", dst.norm[:maxSym+1], "LEN:", fakeLength)
}
return dst.writeCount(nil)
}
if debug {
print("Literal lengths: ")
}
llTable, err := copyHist(block.coders.llEnc, &ll)
if err != nil {
return nil, err
}
if debug {
print("Match lengths: ")
}
mlTable, err := copyHist(block.coders.mlEnc, &ml)
if err != nil {
return nil, err
}
if debug {
print("Offsets: ")
}
ofTable, err := copyHist(block.coders.ofEnc, &of)
if err != nil {
return nil, err
}
// Literal table
avgSize := litTotal
if avgSize > huff0.BlockSizeMax/2 {
avgSize = huff0.BlockSizeMax / 2
}
huffBuff := make([]byte, 0, avgSize)
// Target size
div := litTotal / avgSize
if div < 1 {
div = 1
}
if debug {
println("Huffman weights:")
}
for i, n := range remain[:] {
if n > 0 {
n = n / div
// Allow all entries to be represented.
if n == 0 {
n = 1
}
huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
if debug {
printf("[%d: %d], ", i, n)
}
}
}
if o.CompatV155 && remain[255]/div == 0 {
huffBuff = append(huffBuff, 255)
}
scratch := &huff0.Scratch{TableLog: 11}
for tries := 0; tries < 255; tries++ {
scratch = &huff0.Scratch{TableLog: 11}
_, _, err = huff0.Compress1X(huffBuff, scratch)
if err == nil {
break
}
if debug {
printf("Try %d: Huffman error: %v\n", tries+1, err)
}
huffBuff = huffBuff[:0]
if tries == 250 {
if debug {
println("Huffman: Bailing out with predefined table")
}
// Bail out.... Just generate something
huffBuff = append(huffBuff, bytes.Repeat([]byte{255}, 10000)...)
for i := 0; i < 128; i++ {
huffBuff = append(huffBuff, byte(i))
}
continue
}
if errors.Is(err, huff0.ErrIncompressible) {
// Try truncating least common.
for i, n := range remain[:] {
if n > 0 {
n = n / (div * (i + 1))
if n > 0 {
huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
}
}
}
if o.CompatV155 && len(huffBuff) > 0 && huffBuff[len(huffBuff)-1] != 255 {
huffBuff = append(huffBuff, 255)
}
if len(huffBuff) == 0 {
huffBuff = append(huffBuff, 0, 255)
}
}
if errors.Is(err, huff0.ErrUseRLE) {
for i, n := range remain[:] {
n = n / (div * (i + 1))
// Allow all entries to be represented.
if n == 0 {
n = 1
}
huffBuff = append(huffBuff, bytes.Repeat([]byte{byte(i)}, n)...)
}
}
}
var out bytes.Buffer
out.Write([]byte(dictMagic))
out.Write(binary.LittleEndian.AppendUint32(nil, o.ID))
out.Write(scratch.OutTable)
if debug {
println("huff table:", len(scratch.OutTable), "bytes")
println("of table:", len(ofTable), "bytes")
println("ml table:", len(mlTable), "bytes")
println("ll table:", len(llTable), "bytes")
}
out.Write(ofTable)
out.Write(mlTable)
out.Write(llTable)
out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[0])))
out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[1])))
out.Write(binary.LittleEndian.AppendUint32(nil, uint32(o.Offsets[2])))
out.Write(hist)
if debug {
_, err := loadDict(out.Bytes())
if err != nil {
panic(err)
}
i, err := InspectDictionary(out.Bytes())
if err != nil {
panic(err)
}
println("ID:", i.ID())
println("Content size:", i.ContentSize())
println("Encoder:", i.LitEncoder() != nil)
println("Offsets:", i.Offsets())
var totalSize int
for _, b := range contents {
totalSize += len(b)
}
encWith := func(opts ...EOption) int {
enc, err := NewWriter(nil, opts...)
if err != nil {
panic(err)
}
defer enc.Close()
var dst []byte
var totalSize int
for _, b := range contents {
dst = enc.EncodeAll(b, dst[:0])
totalSize += len(dst)
}
return totalSize
}
plain := encWith(WithEncoderLevel(o.Level))
withDict := encWith(WithEncoderLevel(o.Level), WithEncoderDict(out.Bytes()))
println("Input size:", totalSize)
println("Plain Compressed:", plain)
println("Dict Compressed:", withDict)
println("Saved:", plain-withDict, (plain-withDict)/len(contents), "bytes per input (rounded down)")
}
return out.Bytes(), nil
}

View File

@ -144,6 +144,7 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
} else { } else {
e.crc.Reset() e.crc.Reset()
} }
e.blk.dictLitEnc = nil
if d != nil { if d != nil {
low := e.lowMem low := e.lowMem
if singleBlock { if singleBlock {

View File

@ -34,7 +34,7 @@ type match struct {
est int32 est int32
} }
const highScore = 25000 const highScore = maxMatchLen * 8
// estBits will estimate output bits from predefined tables. // estBits will estimate output bits from predefined tables.
func (m *match) estBits(bitsPerByte int32) { func (m *match) estBits(bitsPerByte int32) {
@ -159,7 +159,6 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
// nextEmit is where in src the next emitLiteral should start from. // nextEmit is where in src the next emitLiteral should start from.
nextEmit := s nextEmit := s
cv := load6432(src, s)
// Relative offsets // Relative offsets
offset1 := int32(blk.recentOffsets[0]) offset1 := int32(blk.recentOffsets[0])
@ -173,7 +172,6 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...) blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit) s.litLen = uint32(until - nextEmit)
} }
_ = addLiterals
if debugEncoder { if debugEncoder {
println("recent offsets:", blk.recentOffsets) println("recent offsets:", blk.recentOffsets)
@ -188,7 +186,9 @@ encodeLoop:
panic("offset0 was 0") panic("offset0 was 0")
} }
const goodEnough = 100 const goodEnough = 250
cv := load6432(src, s)
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
@ -197,15 +197,50 @@ encodeLoop:
// Set m to a match at offset if it looks like that will improve compression. // Set m to a match at offset if it looks like that will improve compression.
improve := func(m *match, offset int32, s int32, first uint32, rep int32) { improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first { delta := s - offset
if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
return return
} }
if debugAsserts { if debugAsserts {
if offset >= s {
panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
}
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) { if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first)) panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
} }
} }
cand := match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep} // Try to quick reject if we already have a long match.
if m.length > 16 {
left := len(src) - int(m.s+m.length)
// If we are too close to the end, keep as is.
if left <= 0 {
return
}
checkLen := m.length - (s - m.s) - 8
if left > 2 && checkLen > 4 {
// Check 4 bytes, 4 bytes from the end of the current match.
a := load3232(src, offset+checkLen)
b := load3232(src, s+checkLen)
if a != b {
return
}
}
}
l := 4 + e.matchlen(s+4, offset+4, src)
if rep < 0 {
// Extend candidate match backwards as far as possible.
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for offset > tMin && s > nextEmit && src[offset-1] == src[s-1] && l < maxMatchLength {
s--
offset--
l++
}
}
cand := match{offset: offset, s: s, length: l, rep: rep}
cand.estBits(bitsPerByte) cand.estBits(bitsPerByte)
if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 { if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
*m = cand *m = cand
@ -219,17 +254,29 @@ encodeLoop:
improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1) improve(&best, candidateS.prev-e.cur, s, uint32(cv), -1)
if canRepeat && best.length < goodEnough { if canRepeat && best.length < goodEnough {
cv32 := uint32(cv >> 8) if s == nextEmit {
spp := s + 1 // Check repeats straight after a match.
improve(&best, spp-offset1, spp, cv32, 1) improve(&best, s-offset2, s, uint32(cv), 1|4)
improve(&best, spp-offset2, spp, cv32, 2) improve(&best, s-offset3, s, uint32(cv), 2|4)
improve(&best, spp-offset3, spp, cv32, 3) if offset1 > 1 {
if best.length > 0 { improve(&best, s-(offset1-1), s, uint32(cv), 3|4)
cv32 = uint32(cv >> 24) }
spp += 2 }
// If either no match or a non-repeat match, check at + 1
if best.rep <= 0 {
cv32 := uint32(cv >> 8)
spp := s + 1
improve(&best, spp-offset1, spp, cv32, 1) improve(&best, spp-offset1, spp, cv32, 1)
improve(&best, spp-offset2, spp, cv32, 2) improve(&best, spp-offset2, spp, cv32, 2)
improve(&best, spp-offset3, spp, cv32, 3) improve(&best, spp-offset3, spp, cv32, 3)
if best.rep < 0 {
cv32 = uint32(cv >> 24)
spp += 2
improve(&best, spp-offset1, spp, cv32, 1)
improve(&best, spp-offset2, spp, cv32, 2)
improve(&best, spp-offset3, spp, cv32, 3)
}
} }
} }
// Load next and check... // Load next and check...
@ -244,41 +291,44 @@ encodeLoop:
if s >= sLimit { if s >= sLimit {
break encodeLoop break encodeLoop
} }
cv = load6432(src, s)
continue continue
} }
s++
candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)] candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)]
cv = load6432(src, s) cv = load6432(src, s+1)
cv2 := load6432(src, s+1) cv2 := load6432(src, s+2)
candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)] candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)]
candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)] candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)]
// Short at s+1 // Short at s+1
improve(&best, candidateS.offset-e.cur, s, uint32(cv), -1) improve(&best, candidateS.offset-e.cur, s+1, uint32(cv), -1)
// Long at s+1, s+2 // Long at s+1, s+2
improve(&best, candidateL.offset-e.cur, s, uint32(cv), -1) improve(&best, candidateL.offset-e.cur, s+1, uint32(cv), -1)
improve(&best, candidateL.prev-e.cur, s, uint32(cv), -1) improve(&best, candidateL.prev-e.cur, s+1, uint32(cv), -1)
improve(&best, candidateL2.offset-e.cur, s+1, uint32(cv2), -1) improve(&best, candidateL2.offset-e.cur, s+2, uint32(cv2), -1)
improve(&best, candidateL2.prev-e.cur, s+1, uint32(cv2), -1) improve(&best, candidateL2.prev-e.cur, s+2, uint32(cv2), -1)
if false { if false {
// Short at s+3. // Short at s+3.
// Too often worse... // Too often worse...
improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+2, uint32(cv2>>8), -1) improve(&best, e.table[hashLen(cv2>>8, bestShortTableBits, bestShortLen)].offset-e.cur, s+3, uint32(cv2>>8), -1)
} }
// See if we can find a better match by checking where the current best ends.
// Use that offset to see if we can find a better full match. // Start check at a fixed offset to allow for a few mismatches.
if sAt := best.s + best.length; sAt < sLimit { // For this compression level 2 yields the best results.
nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen) // We cannot do this if we have already indexed this position.
candidateEnd := e.longTable[nextHashL] const skipBeginning = 2
// Start check at a fixed offset to allow for a few mismatches. if best.s > s-skipBeginning {
// For this compression level 2 yields the best results. // See if we can find a better match by checking where the current best ends.
const skipBeginning = 2 // Use that offset to see if we can find a better full match.
if pos := candidateEnd.offset - e.cur - best.length + skipBeginning; pos >= 0 { if sAt := best.s + best.length; sAt < sLimit {
improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1) nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen)
if pos := candidateEnd.prev - e.cur - best.length + skipBeginning; pos >= 0 { candidateEnd := e.longTable[nextHashL]
improve(&best, pos, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
if off := candidateEnd.offset - e.cur - best.length + skipBeginning; off >= 0 {
improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
if off := candidateEnd.prev - e.cur - best.length + skipBeginning; off >= 0 {
improve(&best, off, best.s+skipBeginning, load3232(src, best.s+skipBeginning), -1)
}
} }
} }
} }
@ -292,51 +342,34 @@ encodeLoop:
// We have a match, we can store the forward value // We have a match, we can store the forward value
if best.rep > 0 { if best.rep > 0 {
s = best.s
var seq seq var seq seq
seq.matchLen = uint32(best.length - zstdMinMatch) seq.matchLen = uint32(best.length - zstdMinMatch)
if debugAsserts && s < nextEmit {
// We might be able to match backwards. panic("s < nextEmit")
// Extend as long as we can.
start := best.s
// We end the search early, so we don't risk 0 literals
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
} }
repIndex := best.offset addLiterals(&seq, best.s)
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
seq.matchLen++
}
addLiterals(&seq, start)
// rep 0 // Repeat. If bit 4 is set, this is a non-lit repeat.
seq.offset = uint32(best.rep) seq.offset = uint32(best.rep & 3)
if debugSequences { if debugSequences {
println("repeat sequence", seq, "next s:", s) println("repeat sequence", seq, "next s:", s)
} }
blk.sequences = append(blk.sequences, seq) blk.sequences = append(blk.sequences, seq)
// Index match start+1 (long) -> s - 1 // Index old s + 1 -> s - 1
index0 := s index0 := s + 1
s = best.s + best.length s = best.s + best.length
nextEmit = s nextEmit = s
if s >= sLimit { if s >= sLimit {
if debugEncoder { if debugEncoder {
println("repeat ended", s, best.length) println("repeat ended", s, best.length)
} }
break encodeLoop break encodeLoop
} }
// Index skipped... // Index skipped...
off := index0 + e.cur off := index0 + e.cur
for index0 < s-1 { for index0 < s {
cv0 := load6432(src, index0) cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen) h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen) h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@ -346,17 +379,19 @@ encodeLoop:
index0++ index0++
} }
switch best.rep { switch best.rep {
case 2: case 2, 4 | 1:
offset1, offset2 = offset2, offset1 offset1, offset2 = offset2, offset1
case 3: case 3, 4 | 2:
offset1, offset2, offset3 = offset3, offset1, offset2 offset1, offset2, offset3 = offset3, offset1, offset2
case 4 | 3:
offset1, offset2, offset3 = offset1-1, offset1, offset2
} }
cv = load6432(src, s)
continue continue
} }
// A 4-byte match has been found. Update recent offsets. // A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes. // We'll later see if more than 4 bytes.
index0 := s + 1
s = best.s s = best.s
t := best.offset t := best.offset
offset1, offset2, offset3 = s-t, offset1, offset2 offset1, offset2, offset3 = s-t, offset1, offset2
@ -369,22 +404,9 @@ encodeLoop:
panic("invalid offset") panic("invalid offset")
} }
// Extend the n-byte match as long as possible.
l := best.length
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
l++
}
// Write our sequence // Write our sequence
var seq seq var seq seq
l := best.length
seq.litLen = uint32(s - nextEmit) seq.litLen = uint32(s - nextEmit)
seq.matchLen = uint32(l - zstdMinMatch) seq.matchLen = uint32(l - zstdMinMatch)
if seq.litLen > 0 { if seq.litLen > 0 {
@ -401,10 +423,8 @@ encodeLoop:
break encodeLoop break encodeLoop
} }
// Index match start+1 (long) -> s - 1 // Index old s + 1 -> s - 1
index0 := s - l + 1 for index0 < s {
// every entry
for index0 < s-1 {
cv0 := load6432(src, index0) cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen) h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen) h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@ -413,50 +433,6 @@ encodeLoop:
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
index0++ index0++
} }
cv = load6432(src, s)
if !canRepeat {
continue
}
// Check offset 2
for {
o2 := s - offset2
if load3232(src, o2) != uint32(cv) {
// Do regular search
break
}
// Store this, since we have it.
nextHashS := hashLen(cv, bestShortTableBits, bestShortLen)
nextHashL := hashLen(cv, bestLongTableBits, bestLongLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
// Since litlen is always 0, this is offset 1.
seq.offset = 1
s += l
nextEmit = s
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
// Swap offset 1 and 2.
offset1, offset2 = offset2, offset1
if s >= sLimit {
// Finished
break encodeLoop
}
cv = load6432(src, s)
}
} }
if int(nextEmit) < len(src) { if int(nextEmit) < len(src) {

View File

@ -1084,7 +1084,7 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
} }
} }
e.lastDictID = d.id e.lastDictID = d.id
e.allDirty = true allDirty = true
} }
// Reset table to initial state // Reset table to initial state
e.cur = e.maxMatchOff e.cur = e.maxMatchOff

View File

@ -133,8 +133,7 @@ encodeLoop:
if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well. // Consider history as well.
var seq seq var seq seq
var length int32 length := 4 + e.matchlen(s+6, repIndex+4, src)
length = 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch) seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards. // We might be able to match backwards.
@ -645,8 +644,7 @@ encodeLoop:
if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well. // Consider history as well.
var seq seq var seq seq
var length int32 length := 4 + e.matchlen(s+6, repIndex+4, src)
length = 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch) seq.matchLen = uint32(length - zstdMinMatch)
@ -831,13 +829,12 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
} }
if true { if true {
end := e.maxMatchOff + int32(len(d.content)) - 8 end := e.maxMatchOff + int32(len(d.content)) - 8
for i := e.maxMatchOff; i < end; i += 3 { for i := e.maxMatchOff; i < end; i += 2 {
const hashLog = tableBits const hashLog = tableBits
cv := load6432(d.content, i-e.maxMatchOff) cv := load6432(d.content, i-e.maxMatchOff)
nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 5 nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 6
nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 6 nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 7
nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7
e.dictTable[nextHash] = tableEntry{ e.dictTable[nextHash] = tableEntry{
val: uint32(cv), val: uint32(cv),
offset: i, offset: i,
@ -846,10 +843,6 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
val: uint32(cv >> 8), val: uint32(cv >> 8),
offset: i + 1, offset: i + 1,
} }
e.dictTable[nextHash2] = tableEntry{
val: uint32(cv >> 16),
offset: i + 2,
}
} }
} }
e.lastDictID = d.id e.lastDictID = d.id

View File

@ -227,10 +227,7 @@ func (e *Encoder) nextBlock(final bool) error {
DictID: e.o.dict.ID(), DictID: e.o.dict.ID(),
} }
dst, err := fh.appendTo(tmp[:0]) dst := fh.appendTo(tmp[:0])
if err != nil {
return err
}
s.headerWritten = true s.headerWritten = true
s.wWg.Wait() s.wWg.Wait()
var n2 int var n2 int
@ -277,23 +274,9 @@ func (e *Encoder) nextBlock(final bool) error {
s.eofWritten = true s.eofWritten = true
} }
err := errIncompressible s.err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
// If we got the exact same number of literals as input, if s.err != nil {
// assume the literals cannot be compressed. return s.err
if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
}
switch err {
case errIncompressible:
if debugEncoder {
println("Storing incompressible block as raw")
}
blk.encodeRaw(src)
// In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
case nil:
default:
s.err = err
return err
} }
_, s.err = s.w.Write(blk.output) _, s.err = s.w.Write(blk.output)
s.nWritten += int64(len(blk.output)) s.nWritten += int64(len(blk.output))
@ -343,22 +326,8 @@ func (e *Encoder) nextBlock(final bool) error {
} }
s.wWg.Done() s.wWg.Done()
}() }()
err := errIncompressible s.writeErr = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
// If we got the exact same number of literals as input, if s.writeErr != nil {
// assume the literals cannot be compressed.
if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
}
switch err {
case errIncompressible:
if debugEncoder {
println("Storing incompressible block as raw")
}
blk.encodeRaw(src)
// In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
case nil:
default:
s.writeErr = err
return return
} }
_, s.writeErr = s.w.Write(blk.output) _, s.writeErr = s.w.Write(blk.output)
@ -511,7 +480,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
Checksum: false, Checksum: false,
DictID: 0, DictID: 0,
} }
dst, _ = fh.appendTo(dst) dst = fh.appendTo(dst)
// Write raw block as last one only. // Write raw block as last one only.
var blk blockHeader var blk blockHeader
@ -546,10 +515,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem { if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
dst = make([]byte, 0, len(src)) dst = make([]byte, 0, len(src))
} }
dst, err := fh.appendTo(dst) dst = fh.appendTo(dst)
if err != nil {
panic(err)
}
// If we can do everything in one block, prefer that. // If we can do everything in one block, prefer that.
if len(src) <= e.o.blockSize { if len(src) <= e.o.blockSize {
@ -568,25 +534,15 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
// If we got the exact same number of literals as input, // If we got the exact same number of literals as input,
// assume the literals cannot be compressed. // assume the literals cannot be compressed.
err := errIncompressible
oldout := blk.output oldout := blk.output
if len(blk.literals) != len(src) || len(src) != e.o.blockSize { // Output directly to dst
// Output directly to dst blk.output = dst
blk.output = dst
err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
}
switch err { err := blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
case errIncompressible: if err != nil {
if debugEncoder {
println("Storing incompressible block as raw")
}
dst = blk.encodeRawTo(dst, src)
case nil:
dst = blk.output
default:
panic(err) panic(err)
} }
dst = blk.output
blk.output = oldout blk.output = oldout
} else { } else {
enc.Reset(e.o.dict, false) enc.Reset(e.o.dict, false)
@ -605,25 +561,11 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
if len(src) == 0 { if len(src) == 0 {
blk.last = true blk.last = true
} }
err := errIncompressible err := blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
// If we got the exact same number of literals as input, if err != nil {
// assume the literals cannot be compressed.
if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
}
switch err {
case errIncompressible:
if debugEncoder {
println("Storing incompressible block as raw")
}
dst = blk.encodeRawTo(dst, todo)
blk.popOffsets()
case nil:
dst = append(dst, blk.output...)
default:
panic(err) panic(err)
} }
dst = append(dst, blk.output...)
blk.reset(nil) blk.reset(nil)
} }
} }
@ -633,6 +575,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
// Add padding with content from crypto/rand.Reader // Add padding with content from crypto/rand.Reader
if e.o.pad > 0 { if e.o.pad > 0 {
add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad)) add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad))
var err error
dst, err = skippableFrame(dst, add, rand.Reader) dst, err = skippableFrame(dst, add, rand.Reader)
if err != nil { if err != nil {
panic(err) panic(err)

View File

@ -39,7 +39,7 @@ func (o *encoderOptions) setDefault() {
blockSize: maxCompressedBlockSize, blockSize: maxCompressedBlockSize,
windowSize: 8 << 20, windowSize: 8 << 20,
level: SpeedDefault, level: SpeedDefault,
allLitEntropy: true, allLitEntropy: false,
lowMem: false, lowMem: false,
} }
} }
@ -129,7 +129,7 @@ func WithEncoderPadding(n int) EOption {
} }
// No need to waste our time. // No need to waste our time.
if n == 1 { if n == 1 {
o.pad = 0 n = 0
} }
if n > 1<<30 { if n > 1<<30 {
return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ") return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
@ -238,7 +238,7 @@ func WithEncoderLevel(l EncoderLevel) EOption {
} }
} }
if !o.customALEntropy { if !o.customALEntropy {
o.allLitEntropy = l > SpeedFastest o.allLitEntropy = l > SpeedDefault
} }
return nil return nil

View File

@ -73,20 +73,20 @@ func (d *frameDec) reset(br byteBuffer) error {
switch err { switch err {
case io.EOF, io.ErrUnexpectedEOF: case io.EOF, io.ErrUnexpectedEOF:
return io.EOF return io.EOF
default:
return err
case nil: case nil:
signature[0] = b[0] signature[0] = b[0]
default:
return err
} }
// Read the rest, don't allow io.ErrUnexpectedEOF // Read the rest, don't allow io.ErrUnexpectedEOF
b, err = br.readSmall(3) b, err = br.readSmall(3)
switch err { switch err {
case io.EOF: case io.EOF:
return io.EOF return io.EOF
default:
return err
case nil: case nil:
copy(signature[1:], b) copy(signature[1:], b)
default:
return err
} }
if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 { if string(signature[1:4]) != skippableFrameMagic || signature[0]&0xf0 != 0x50 {
@ -293,13 +293,9 @@ func (d *frameDec) next(block *blockDec) error {
return nil return nil
} }
// checkCRC will check the checksum if the frame has one. // checkCRC will check the checksum, assuming the frame has one.
// Will return ErrCRCMismatch if crc check failed, otherwise nil. // Will return ErrCRCMismatch if crc check failed, otherwise nil.
func (d *frameDec) checkCRC() error { func (d *frameDec) checkCRC() error {
if !d.HasCheckSum {
return nil
}
// We can overwrite upper tmp now // We can overwrite upper tmp now
buf, err := d.rawInput.readSmall(4) buf, err := d.rawInput.readSmall(4)
if err != nil { if err != nil {
@ -307,10 +303,6 @@ func (d *frameDec) checkCRC() error {
return err return err
} }
if d.o.ignoreChecksum {
return nil
}
want := binary.LittleEndian.Uint32(buf[:4]) want := binary.LittleEndian.Uint32(buf[:4])
got := uint32(d.crc.Sum64()) got := uint32(d.crc.Sum64())
@ -326,17 +318,13 @@ func (d *frameDec) checkCRC() error {
return nil return nil
} }
// consumeCRC reads the checksum data if the frame has one. // consumeCRC skips over the checksum, assuming the frame has one.
func (d *frameDec) consumeCRC() error { func (d *frameDec) consumeCRC() error {
if d.HasCheckSum { _, err := d.rawInput.readSmall(4)
_, err := d.rawInput.readSmall(4) if err != nil {
if err != nil { println("CRC missing?", err)
println("CRC missing?", err)
return err
}
} }
return err
return nil
} }
// runDecoder will run the decoder for the remainder of the frame. // runDecoder will run the decoder for the remainder of the frame.
@ -415,15 +403,8 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
if d.o.ignoreChecksum { if d.o.ignoreChecksum {
err = d.consumeCRC() err = d.consumeCRC()
} else { } else {
var n int d.crc.Write(dst[crcStart:])
n, err = d.crc.Write(dst[crcStart:]) err = d.checkCRC()
if err == nil {
if n != len(dst)-crcStart {
err = io.ErrShortWrite
} else {
err = d.checkCRC()
}
}
} }
} }
} }

View File

@ -22,7 +22,7 @@ type frameHeader struct {
const maxHeaderSize = 14 const maxHeaderSize = 14
func (f frameHeader) appendTo(dst []byte) ([]byte, error) { func (f frameHeader) appendTo(dst []byte) []byte {
dst = append(dst, frameMagic...) dst = append(dst, frameMagic...)
var fhd uint8 var fhd uint8
if f.Checksum { if f.Checksum {
@ -88,7 +88,7 @@ func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
default: default:
panic("invalid fcs") panic("invalid fcs")
} }
return dst, nil return dst
} }
const skippableFrameHeader = 4 + 4 const skippableFrameHeader = 4 + 4

View File

@ -0,0 +1,16 @@
//go:build amd64 && !appengine && !noasm && gc
// +build amd64,!appengine,!noasm,gc
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package zstd
// matchLen returns how many bytes match in a and b
//
// It assumes that:
//
// len(a) <= len(b) and len(a) > 0
//
//go:noescape
func matchLen(a []byte, b []byte) int

View File

@ -0,0 +1,68 @@
// Copied from S2 implementation.
//go:build !appengine && !noasm && gc && !noasm
#include "textflag.h"
// func matchLen(a []byte, b []byte) int
// Requires: BMI
TEXT ·matchLen(SB), NOSPLIT, $0-56
MOVQ a_base+0(FP), AX
MOVQ b_base+24(FP), CX
MOVQ a_len+8(FP), DX
// matchLen
XORL SI, SI
CMPL DX, $0x08
JB matchlen_match4_standalone
matchlen_loopback_standalone:
MOVQ (AX)(SI*1), BX
XORQ (CX)(SI*1), BX
TESTQ BX, BX
JZ matchlen_loop_standalone
#ifdef GOAMD64_v3
TZCNTQ BX, BX
#else
BSFQ BX, BX
#endif
SARQ $0x03, BX
LEAL (SI)(BX*1), SI
JMP gen_match_len_end
matchlen_loop_standalone:
LEAL -8(DX), DX
LEAL 8(SI), SI
CMPL DX, $0x08
JAE matchlen_loopback_standalone
matchlen_match4_standalone:
CMPL DX, $0x04
JB matchlen_match2_standalone
MOVL (AX)(SI*1), BX
CMPL (CX)(SI*1), BX
JNE matchlen_match2_standalone
LEAL -4(DX), DX
LEAL 4(SI), SI
matchlen_match2_standalone:
CMPL DX, $0x02
JB matchlen_match1_standalone
MOVW (AX)(SI*1), BX
CMPW (CX)(SI*1), BX
JNE matchlen_match1_standalone
LEAL -2(DX), DX
LEAL 2(SI), SI
matchlen_match1_standalone:
CMPL DX, $0x01
JB gen_match_len_end
MOVB (AX)(SI*1), BL
CMPB (CX)(SI*1), BL
JNE gen_match_len_end
INCL SI
gen_match_len_end:
MOVQ SI, ret+48(FP)
RET

View File

@ -0,0 +1,33 @@
//go:build !amd64 || appengine || !gc || noasm
// +build !amd64 appengine !gc noasm
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package zstd
import (
"encoding/binary"
"math/bits"
)
// matchLen returns the maximum common prefix length of a and b.
// a must be the shortest of the two.
func matchLen(a, b []byte) (n int) {
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
if diff != 0 {
return n + bits.TrailingZeros64(diff)>>3
}
n += 8
}
for i := range a {
if a[i] != b[i] {
break
}
n++
}
return n
}

View File

@ -236,13 +236,16 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
maxBlockSize = s.windowSize maxBlockSize = s.windowSize
} }
if debugDecoder {
println("decodeSync: decoding", seqs, "sequences", br.remain(), "bits remain on stream")
}
for i := seqs - 1; i >= 0; i-- { for i := seqs - 1; i >= 0; i-- {
if br.overread() { if br.overread() {
printf("reading sequence %d, exceeded available data\n", seqs-i) printf("reading sequence %d, exceeded available data. Overread by %d\n", seqs-i, -br.remain())
return io.ErrUnexpectedEOF return io.ErrUnexpectedEOF
} }
var ll, mo, ml int var ll, mo, ml int
if br.off > 4+((maxOffsetBits+16+16)>>3) { if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
// inlined function: // inlined function:
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState) // ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
@ -449,18 +452,13 @@ func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol)
// extra bits are stored in reverse order. // extra bits are stored in reverse order.
br.fill() br.fill()
if s.maxBits <= 32 { mo += br.getBits(moB)
mo += br.getBits(moB) if s.maxBits > 32 {
ml += br.getBits(mlB)
ll += br.getBits(llB)
} else {
mo += br.getBits(moB)
br.fill() br.fill()
// matchlength+literal length, max 32 bits
ml += br.getBits(mlB)
ll += br.getBits(llB)
} }
// matchlength+literal length, max 32 bits
ml += br.getBits(mlB)
ll += br.getBits(llB)
mo = s.adjustOffset(mo, ll, moB) mo = s.adjustOffset(mo, ll, moB)
return return
} }

View File

@ -5,6 +5,7 @@ package zstd
import ( import (
"fmt" "fmt"
"io"
"github.com/klauspost/compress/internal/cpuinfo" "github.com/klauspost/compress/internal/cpuinfo"
) )
@ -134,6 +135,9 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
ctx.ll, ctx.litRemain+ctx.ll) ctx.ll, ctx.litRemain+ctx.ll)
case errorOverread:
return true, io.ErrUnexpectedEOF
case errorNotEnoughSpace: case errorNotEnoughSpace:
size := ctx.outPosition + ctx.ll + ctx.ml size := ctx.outPosition + ctx.ll + ctx.ml
if debugDecoder { if debugDecoder {
@ -202,6 +206,9 @@ const errorNotEnoughLiterals = 4
// error reported when capacity of `out` is too small // error reported when capacity of `out` is too small
const errorNotEnoughSpace = 5 const errorNotEnoughSpace = 5
// error reported when bits are overread.
const errorOverread = 6
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm. // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
// //
// Please refer to seqdec_generic.go for the reference implementation. // Please refer to seqdec_generic.go for the reference implementation.
@ -247,6 +254,10 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
litRemain: len(s.literals), litRemain: len(s.literals),
} }
if debugDecoder {
println("decode: decoding", len(seqs), "sequences", br.remain(), "bits remain on stream")
}
s.seqSize = 0 s.seqSize = 0
lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56 lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
var errCode int var errCode int
@ -277,6 +288,8 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
case errorNotEnoughLiterals: case errorNotEnoughLiterals:
ll := ctx.seqs[i].ll ll := ctx.seqs[i].ll
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll) return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
case errorOverread:
return io.ErrUnexpectedEOF
} }
return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode) return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
@ -291,6 +304,9 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
if s.seqSize > maxBlockSize { if s.seqSize > maxBlockSize {
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize) return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
} }
if debugDecoder {
println("decode: ", br.remain(), "bits remain on stream. code:", errCode)
}
err := br.close() err := br.close()
if err != nil { if err != nil {
printf("Closing sequences: %v, %+v\n", err, *br) printf("Closing sequences: %v, %+v\n", err, *br)

View File

@ -5,11 +5,11 @@
// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV // Requires: CMOV
TEXT ·sequenceDecs_decode_amd64(SB), $8-32 TEXT ·sequenceDecs_decode_amd64(SB), $8-32
MOVQ br+8(FP), AX MOVQ br+8(FP), CX
MOVQ 32(AX), DX MOVQ 24(CX), DX
MOVBQZX 40(AX), BX MOVBQZX 32(CX), BX
MOVQ 24(AX), SI MOVQ (CX), AX
MOVQ (AX), AX MOVQ 8(CX), SI
ADDQ SI, AX ADDQ SI, AX
MOVQ AX, (SP) MOVQ AX, (SP)
MOVQ ctx+16(FP), AX MOVQ ctx+16(FP), AX
@ -38,7 +38,7 @@ sequenceDecs_decode_amd64_main_loop:
sequenceDecs_decode_amd64_fill_byte_by_byte: sequenceDecs_decode_amd64_fill_byte_by_byte:
CMPQ SI, $0x00 CMPQ SI, $0x00
JLE sequenceDecs_decode_amd64_fill_end JLE sequenceDecs_decode_amd64_fill_check_overread
CMPQ BX, $0x07 CMPQ BX, $0x07
JLE sequenceDecs_decode_amd64_fill_end JLE sequenceDecs_decode_amd64_fill_end
SHLQ $0x08, DX SHLQ $0x08, DX
@ -49,6 +49,10 @@ sequenceDecs_decode_amd64_fill_byte_by_byte:
ORQ AX, DX ORQ AX, DX
JMP sequenceDecs_decode_amd64_fill_byte_by_byte JMP sequenceDecs_decode_amd64_fill_byte_by_byte
sequenceDecs_decode_amd64_fill_check_overread:
CMPQ BX, $0x40
JA error_overread
sequenceDecs_decode_amd64_fill_end: sequenceDecs_decode_amd64_fill_end:
// Update offset // Update offset
MOVQ R9, AX MOVQ R9, AX
@ -105,7 +109,7 @@ sequenceDecs_decode_amd64_ml_update_zero:
sequenceDecs_decode_amd64_fill_2_byte_by_byte: sequenceDecs_decode_amd64_fill_2_byte_by_byte:
CMPQ SI, $0x00 CMPQ SI, $0x00
JLE sequenceDecs_decode_amd64_fill_2_end JLE sequenceDecs_decode_amd64_fill_2_check_overread
CMPQ BX, $0x07 CMPQ BX, $0x07
JLE sequenceDecs_decode_amd64_fill_2_end JLE sequenceDecs_decode_amd64_fill_2_end
SHLQ $0x08, DX SHLQ $0x08, DX
@ -116,6 +120,10 @@ sequenceDecs_decode_amd64_fill_2_byte_by_byte:
ORQ AX, DX ORQ AX, DX
JMP sequenceDecs_decode_amd64_fill_2_byte_by_byte JMP sequenceDecs_decode_amd64_fill_2_byte_by_byte
sequenceDecs_decode_amd64_fill_2_check_overread:
CMPQ BX, $0x40
JA error_overread
sequenceDecs_decode_amd64_fill_2_end: sequenceDecs_decode_amd64_fill_2_end:
// Update literal length // Update literal length
MOVQ DI, AX MOVQ DI, AX
@ -293,9 +301,9 @@ sequenceDecs_decode_amd64_match_len_ofs_ok:
MOVQ R12, 152(AX) MOVQ R12, 152(AX)
MOVQ R13, 160(AX) MOVQ R13, 160(AX)
MOVQ br+8(FP), AX MOVQ br+8(FP), AX
MOVQ DX, 32(AX) MOVQ DX, 24(AX)
MOVB BL, 40(AX) MOVB BL, 32(AX)
MOVQ SI, 24(AX) MOVQ SI, 8(AX)
// Return success // Return success
MOVQ $0x00000000, ret+24(FP) MOVQ $0x00000000, ret+24(FP)
@ -320,14 +328,19 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP) MOVQ $0x00000004, ret+24(FP)
RET RET
// Return with overread error
error_overread:
MOVQ $0x00000006, ret+24(FP)
RET
// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV // Requires: CMOV
TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
MOVQ br+8(FP), AX MOVQ br+8(FP), CX
MOVQ 32(AX), DX MOVQ 24(CX), DX
MOVBQZX 40(AX), BX MOVBQZX 32(CX), BX
MOVQ 24(AX), SI MOVQ (CX), AX
MOVQ (AX), AX MOVQ 8(CX), SI
ADDQ SI, AX ADDQ SI, AX
MOVQ AX, (SP) MOVQ AX, (SP)
MOVQ ctx+16(FP), AX MOVQ ctx+16(FP), AX
@ -356,7 +369,7 @@ sequenceDecs_decode_56_amd64_main_loop:
sequenceDecs_decode_56_amd64_fill_byte_by_byte: sequenceDecs_decode_56_amd64_fill_byte_by_byte:
CMPQ SI, $0x00 CMPQ SI, $0x00
JLE sequenceDecs_decode_56_amd64_fill_end JLE sequenceDecs_decode_56_amd64_fill_check_overread
CMPQ BX, $0x07 CMPQ BX, $0x07
JLE sequenceDecs_decode_56_amd64_fill_end JLE sequenceDecs_decode_56_amd64_fill_end
SHLQ $0x08, DX SHLQ $0x08, DX
@ -367,6 +380,10 @@ sequenceDecs_decode_56_amd64_fill_byte_by_byte:
ORQ AX, DX ORQ AX, DX
JMP sequenceDecs_decode_56_amd64_fill_byte_by_byte JMP sequenceDecs_decode_56_amd64_fill_byte_by_byte
sequenceDecs_decode_56_amd64_fill_check_overread:
CMPQ BX, $0x40
JA error_overread
sequenceDecs_decode_56_amd64_fill_end: sequenceDecs_decode_56_amd64_fill_end:
// Update offset // Update offset
MOVQ R9, AX MOVQ R9, AX
@ -586,9 +603,9 @@ sequenceDecs_decode_56_amd64_match_len_ofs_ok:
MOVQ R12, 152(AX) MOVQ R12, 152(AX)
MOVQ R13, 160(AX) MOVQ R13, 160(AX)
MOVQ br+8(FP), AX MOVQ br+8(FP), AX
MOVQ DX, 32(AX) MOVQ DX, 24(AX)
MOVB BL, 40(AX) MOVB BL, 32(AX)
MOVQ SI, 24(AX) MOVQ SI, 8(AX)
// Return success // Return success
MOVQ $0x00000000, ret+24(FP) MOVQ $0x00000000, ret+24(FP)
@ -613,14 +630,19 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP) MOVQ $0x00000004, ret+24(FP)
RET RET
// Return with overread error
error_overread:
MOVQ $0x00000006, ret+24(FP)
RET
// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV // Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
MOVQ br+8(FP), CX MOVQ br+8(FP), BX
MOVQ 32(CX), AX MOVQ 24(BX), AX
MOVBQZX 40(CX), DX MOVBQZX 32(BX), DX
MOVQ 24(CX), BX MOVQ (BX), CX
MOVQ (CX), CX MOVQ 8(BX), BX
ADDQ BX, CX ADDQ BX, CX
MOVQ CX, (SP) MOVQ CX, (SP)
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -649,7 +671,7 @@ sequenceDecs_decode_bmi2_main_loop:
sequenceDecs_decode_bmi2_fill_byte_by_byte: sequenceDecs_decode_bmi2_fill_byte_by_byte:
CMPQ BX, $0x00 CMPQ BX, $0x00
JLE sequenceDecs_decode_bmi2_fill_end JLE sequenceDecs_decode_bmi2_fill_check_overread
CMPQ DX, $0x07 CMPQ DX, $0x07
JLE sequenceDecs_decode_bmi2_fill_end JLE sequenceDecs_decode_bmi2_fill_end
SHLQ $0x08, AX SHLQ $0x08, AX
@ -660,6 +682,10 @@ sequenceDecs_decode_bmi2_fill_byte_by_byte:
ORQ CX, AX ORQ CX, AX
JMP sequenceDecs_decode_bmi2_fill_byte_by_byte JMP sequenceDecs_decode_bmi2_fill_byte_by_byte
sequenceDecs_decode_bmi2_fill_check_overread:
CMPQ DX, $0x40
JA error_overread
sequenceDecs_decode_bmi2_fill_end: sequenceDecs_decode_bmi2_fill_end:
// Update offset // Update offset
MOVQ $0x00000808, CX MOVQ $0x00000808, CX
@ -700,7 +726,7 @@ sequenceDecs_decode_bmi2_fill_end:
sequenceDecs_decode_bmi2_fill_2_byte_by_byte: sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
CMPQ BX, $0x00 CMPQ BX, $0x00
JLE sequenceDecs_decode_bmi2_fill_2_end JLE sequenceDecs_decode_bmi2_fill_2_check_overread
CMPQ DX, $0x07 CMPQ DX, $0x07
JLE sequenceDecs_decode_bmi2_fill_2_end JLE sequenceDecs_decode_bmi2_fill_2_end
SHLQ $0x08, AX SHLQ $0x08, AX
@ -711,6 +737,10 @@ sequenceDecs_decode_bmi2_fill_2_byte_by_byte:
ORQ CX, AX ORQ CX, AX
JMP sequenceDecs_decode_bmi2_fill_2_byte_by_byte JMP sequenceDecs_decode_bmi2_fill_2_byte_by_byte
sequenceDecs_decode_bmi2_fill_2_check_overread:
CMPQ DX, $0x40
JA error_overread
sequenceDecs_decode_bmi2_fill_2_end: sequenceDecs_decode_bmi2_fill_2_end:
// Update literal length // Update literal length
MOVQ $0x00000808, CX MOVQ $0x00000808, CX
@ -862,9 +892,9 @@ sequenceDecs_decode_bmi2_match_len_ofs_ok:
MOVQ R11, 152(CX) MOVQ R11, 152(CX)
MOVQ R12, 160(CX) MOVQ R12, 160(CX)
MOVQ br+8(FP), CX MOVQ br+8(FP), CX
MOVQ AX, 32(CX) MOVQ AX, 24(CX)
MOVB DL, 40(CX) MOVB DL, 32(CX)
MOVQ BX, 24(CX) MOVQ BX, 8(CX)
// Return success // Return success
MOVQ $0x00000000, ret+24(FP) MOVQ $0x00000000, ret+24(FP)
@ -889,14 +919,19 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP) MOVQ $0x00000004, ret+24(FP)
RET RET
// Return with overread error
error_overread:
MOVQ $0x00000006, ret+24(FP)
RET
// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV // Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
MOVQ br+8(FP), CX MOVQ br+8(FP), BX
MOVQ 32(CX), AX MOVQ 24(BX), AX
MOVBQZX 40(CX), DX MOVBQZX 32(BX), DX
MOVQ 24(CX), BX MOVQ (BX), CX
MOVQ (CX), CX MOVQ 8(BX), BX
ADDQ BX, CX ADDQ BX, CX
MOVQ CX, (SP) MOVQ CX, (SP)
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -925,7 +960,7 @@ sequenceDecs_decode_56_bmi2_main_loop:
sequenceDecs_decode_56_bmi2_fill_byte_by_byte: sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
CMPQ BX, $0x00 CMPQ BX, $0x00
JLE sequenceDecs_decode_56_bmi2_fill_end JLE sequenceDecs_decode_56_bmi2_fill_check_overread
CMPQ DX, $0x07 CMPQ DX, $0x07
JLE sequenceDecs_decode_56_bmi2_fill_end JLE sequenceDecs_decode_56_bmi2_fill_end
SHLQ $0x08, AX SHLQ $0x08, AX
@ -936,6 +971,10 @@ sequenceDecs_decode_56_bmi2_fill_byte_by_byte:
ORQ CX, AX ORQ CX, AX
JMP sequenceDecs_decode_56_bmi2_fill_byte_by_byte JMP sequenceDecs_decode_56_bmi2_fill_byte_by_byte
sequenceDecs_decode_56_bmi2_fill_check_overread:
CMPQ DX, $0x40
JA error_overread
sequenceDecs_decode_56_bmi2_fill_end: sequenceDecs_decode_56_bmi2_fill_end:
// Update offset // Update offset
MOVQ $0x00000808, CX MOVQ $0x00000808, CX
@ -1113,9 +1152,9 @@ sequenceDecs_decode_56_bmi2_match_len_ofs_ok:
MOVQ R11, 152(CX) MOVQ R11, 152(CX)
MOVQ R12, 160(CX) MOVQ R12, 160(CX)
MOVQ br+8(FP), CX MOVQ br+8(FP), CX
MOVQ AX, 32(CX) MOVQ AX, 24(CX)
MOVB DL, 40(CX) MOVB DL, 32(CX)
MOVQ BX, 24(CX) MOVQ BX, 8(CX)
// Return success // Return success
MOVQ $0x00000000, ret+24(FP) MOVQ $0x00000000, ret+24(FP)
@ -1140,6 +1179,11 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP) MOVQ $0x00000004, ret+24(FP)
RET RET
// Return with overread error
error_overread:
MOVQ $0x00000006, ret+24(FP)
RET
// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool // func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
// Requires: SSE // Requires: SSE
TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9 TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
@ -1753,11 +1797,11 @@ empty_seqs:
// func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: CMOV, SSE // Requires: CMOV, SSE
TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32 TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
MOVQ br+8(FP), AX MOVQ br+8(FP), CX
MOVQ 32(AX), DX MOVQ 24(CX), DX
MOVBQZX 40(AX), BX MOVBQZX 32(CX), BX
MOVQ 24(AX), SI MOVQ (CX), AX
MOVQ (AX), AX MOVQ 8(CX), SI
ADDQ SI, AX ADDQ SI, AX
MOVQ AX, (SP) MOVQ AX, (SP)
MOVQ ctx+16(FP), AX MOVQ ctx+16(FP), AX
@ -1804,7 +1848,7 @@ sequenceDecs_decodeSync_amd64_main_loop:
sequenceDecs_decodeSync_amd64_fill_byte_by_byte: sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
CMPQ SI, $0x00 CMPQ SI, $0x00
JLE sequenceDecs_decodeSync_amd64_fill_end JLE sequenceDecs_decodeSync_amd64_fill_check_overread
CMPQ BX, $0x07 CMPQ BX, $0x07
JLE sequenceDecs_decodeSync_amd64_fill_end JLE sequenceDecs_decodeSync_amd64_fill_end
SHLQ $0x08, DX SHLQ $0x08, DX
@ -1815,6 +1859,10 @@ sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
ORQ AX, DX ORQ AX, DX
JMP sequenceDecs_decodeSync_amd64_fill_byte_by_byte JMP sequenceDecs_decodeSync_amd64_fill_byte_by_byte
sequenceDecs_decodeSync_amd64_fill_check_overread:
CMPQ BX, $0x40
JA error_overread
sequenceDecs_decodeSync_amd64_fill_end: sequenceDecs_decodeSync_amd64_fill_end:
// Update offset // Update offset
MOVQ R9, AX MOVQ R9, AX
@ -1871,7 +1919,7 @@ sequenceDecs_decodeSync_amd64_ml_update_zero:
sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte: sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
CMPQ SI, $0x00 CMPQ SI, $0x00
JLE sequenceDecs_decodeSync_amd64_fill_2_end JLE sequenceDecs_decodeSync_amd64_fill_2_check_overread
CMPQ BX, $0x07 CMPQ BX, $0x07
JLE sequenceDecs_decodeSync_amd64_fill_2_end JLE sequenceDecs_decodeSync_amd64_fill_2_end
SHLQ $0x08, DX SHLQ $0x08, DX
@ -1882,6 +1930,10 @@ sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
ORQ AX, DX ORQ AX, DX
JMP sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte JMP sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte
sequenceDecs_decodeSync_amd64_fill_2_check_overread:
CMPQ BX, $0x40
JA error_overread
sequenceDecs_decodeSync_amd64_fill_2_end: sequenceDecs_decodeSync_amd64_fill_2_end:
// Update literal length // Update literal length
MOVQ DI, AX MOVQ DI, AX
@ -2243,9 +2295,9 @@ handle_loop:
loop_finished: loop_finished:
MOVQ br+8(FP), AX MOVQ br+8(FP), AX
MOVQ DX, 32(AX) MOVQ DX, 24(AX)
MOVB BL, 40(AX) MOVB BL, 32(AX)
MOVQ SI, 24(AX) MOVQ SI, 8(AX)
// Update the context // Update the context
MOVQ ctx+16(FP), AX MOVQ ctx+16(FP), AX
@ -2291,6 +2343,11 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP) MOVQ $0x00000004, ret+24(FP)
RET RET
// Return with overread error
error_overread:
MOVQ $0x00000006, ret+24(FP)
RET
// Return with not enough output space error // Return with not enough output space error
error_not_enough_space: error_not_enough_space:
MOVQ ctx+16(FP), AX MOVQ ctx+16(FP), AX
@ -2305,11 +2362,11 @@ error_not_enough_space:
// func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: BMI, BMI2, CMOV, SSE // Requires: BMI, BMI2, CMOV, SSE
TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32 TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
MOVQ br+8(FP), CX MOVQ br+8(FP), BX
MOVQ 32(CX), AX MOVQ 24(BX), AX
MOVBQZX 40(CX), DX MOVBQZX 32(BX), DX
MOVQ 24(CX), BX MOVQ (BX), CX
MOVQ (CX), CX MOVQ 8(BX), BX
ADDQ BX, CX ADDQ BX, CX
MOVQ CX, (SP) MOVQ CX, (SP)
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -2356,7 +2413,7 @@ sequenceDecs_decodeSync_bmi2_main_loop:
sequenceDecs_decodeSync_bmi2_fill_byte_by_byte: sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
CMPQ BX, $0x00 CMPQ BX, $0x00
JLE sequenceDecs_decodeSync_bmi2_fill_end JLE sequenceDecs_decodeSync_bmi2_fill_check_overread
CMPQ DX, $0x07 CMPQ DX, $0x07
JLE sequenceDecs_decodeSync_bmi2_fill_end JLE sequenceDecs_decodeSync_bmi2_fill_end
SHLQ $0x08, AX SHLQ $0x08, AX
@ -2367,6 +2424,10 @@ sequenceDecs_decodeSync_bmi2_fill_byte_by_byte:
ORQ CX, AX ORQ CX, AX
JMP sequenceDecs_decodeSync_bmi2_fill_byte_by_byte JMP sequenceDecs_decodeSync_bmi2_fill_byte_by_byte
sequenceDecs_decodeSync_bmi2_fill_check_overread:
CMPQ DX, $0x40
JA error_overread
sequenceDecs_decodeSync_bmi2_fill_end: sequenceDecs_decodeSync_bmi2_fill_end:
// Update offset // Update offset
MOVQ $0x00000808, CX MOVQ $0x00000808, CX
@ -2407,7 +2468,7 @@ sequenceDecs_decodeSync_bmi2_fill_end:
sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte: sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
CMPQ BX, $0x00 CMPQ BX, $0x00
JLE sequenceDecs_decodeSync_bmi2_fill_2_end JLE sequenceDecs_decodeSync_bmi2_fill_2_check_overread
CMPQ DX, $0x07 CMPQ DX, $0x07
JLE sequenceDecs_decodeSync_bmi2_fill_2_end JLE sequenceDecs_decodeSync_bmi2_fill_2_end
SHLQ $0x08, AX SHLQ $0x08, AX
@ -2418,6 +2479,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte:
ORQ CX, AX ORQ CX, AX
JMP sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte JMP sequenceDecs_decodeSync_bmi2_fill_2_byte_by_byte
sequenceDecs_decodeSync_bmi2_fill_2_check_overread:
CMPQ DX, $0x40
JA error_overread
sequenceDecs_decodeSync_bmi2_fill_2_end: sequenceDecs_decodeSync_bmi2_fill_2_end:
// Update literal length // Update literal length
MOVQ $0x00000808, CX MOVQ $0x00000808, CX
@ -2753,9 +2818,9 @@ handle_loop:
loop_finished: loop_finished:
MOVQ br+8(FP), CX MOVQ br+8(FP), CX
MOVQ AX, 32(CX) MOVQ AX, 24(CX)
MOVB DL, 40(CX) MOVB DL, 32(CX)
MOVQ BX, 24(CX) MOVQ BX, 8(CX)
// Update the context // Update the context
MOVQ ctx+16(FP), AX MOVQ ctx+16(FP), AX
@ -2801,6 +2866,11 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP) MOVQ $0x00000004, ret+24(FP)
RET RET
// Return with overread error
error_overread:
MOVQ $0x00000006, ret+24(FP)
RET
// Return with not enough output space error // Return with not enough output space error
error_not_enough_space: error_not_enough_space:
MOVQ ctx+16(FP), AX MOVQ ctx+16(FP), AX
@ -2815,11 +2885,11 @@ error_not_enough_space:
// func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: CMOV, SSE // Requires: CMOV, SSE
TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32 TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
MOVQ br+8(FP), AX MOVQ br+8(FP), CX
MOVQ 32(AX), DX MOVQ 24(CX), DX
MOVBQZX 40(AX), BX MOVBQZX 32(CX), BX
MOVQ 24(AX), SI MOVQ (CX), AX
MOVQ (AX), AX MOVQ 8(CX), SI
ADDQ SI, AX ADDQ SI, AX
MOVQ AX, (SP) MOVQ AX, (SP)
MOVQ ctx+16(FP), AX MOVQ ctx+16(FP), AX
@ -2866,7 +2936,7 @@ sequenceDecs_decodeSync_safe_amd64_main_loop:
sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte: sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
CMPQ SI, $0x00 CMPQ SI, $0x00
JLE sequenceDecs_decodeSync_safe_amd64_fill_end JLE sequenceDecs_decodeSync_safe_amd64_fill_check_overread
CMPQ BX, $0x07 CMPQ BX, $0x07
JLE sequenceDecs_decodeSync_safe_amd64_fill_end JLE sequenceDecs_decodeSync_safe_amd64_fill_end
SHLQ $0x08, DX SHLQ $0x08, DX
@ -2877,6 +2947,10 @@ sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
ORQ AX, DX ORQ AX, DX
JMP sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte JMP sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte
sequenceDecs_decodeSync_safe_amd64_fill_check_overread:
CMPQ BX, $0x40
JA error_overread
sequenceDecs_decodeSync_safe_amd64_fill_end: sequenceDecs_decodeSync_safe_amd64_fill_end:
// Update offset // Update offset
MOVQ R9, AX MOVQ R9, AX
@ -2933,7 +3007,7 @@ sequenceDecs_decodeSync_safe_amd64_ml_update_zero:
sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte: sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
CMPQ SI, $0x00 CMPQ SI, $0x00
JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end JLE sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread
CMPQ BX, $0x07 CMPQ BX, $0x07
JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end JLE sequenceDecs_decodeSync_safe_amd64_fill_2_end
SHLQ $0x08, DX SHLQ $0x08, DX
@ -2944,6 +3018,10 @@ sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
ORQ AX, DX ORQ AX, DX
JMP sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte JMP sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte
sequenceDecs_decodeSync_safe_amd64_fill_2_check_overread:
CMPQ BX, $0x40
JA error_overread
sequenceDecs_decodeSync_safe_amd64_fill_2_end: sequenceDecs_decodeSync_safe_amd64_fill_2_end:
// Update literal length // Update literal length
MOVQ DI, AX MOVQ DI, AX
@ -3407,9 +3485,9 @@ handle_loop:
loop_finished: loop_finished:
MOVQ br+8(FP), AX MOVQ br+8(FP), AX
MOVQ DX, 32(AX) MOVQ DX, 24(AX)
MOVB BL, 40(AX) MOVB BL, 32(AX)
MOVQ SI, 24(AX) MOVQ SI, 8(AX)
// Update the context // Update the context
MOVQ ctx+16(FP), AX MOVQ ctx+16(FP), AX
@ -3455,6 +3533,11 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP) MOVQ $0x00000004, ret+24(FP)
RET RET
// Return with overread error
error_overread:
MOVQ $0x00000006, ret+24(FP)
RET
// Return with not enough output space error // Return with not enough output space error
error_not_enough_space: error_not_enough_space:
MOVQ ctx+16(FP), AX MOVQ ctx+16(FP), AX
@ -3469,11 +3552,11 @@ error_not_enough_space:
// func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int // func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// Requires: BMI, BMI2, CMOV, SSE // Requires: BMI, BMI2, CMOV, SSE
TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32 TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
MOVQ br+8(FP), CX MOVQ br+8(FP), BX
MOVQ 32(CX), AX MOVQ 24(BX), AX
MOVBQZX 40(CX), DX MOVBQZX 32(BX), DX
MOVQ 24(CX), BX MOVQ (BX), CX
MOVQ (CX), CX MOVQ 8(BX), BX
ADDQ BX, CX ADDQ BX, CX
MOVQ CX, (SP) MOVQ CX, (SP)
MOVQ ctx+16(FP), CX MOVQ ctx+16(FP), CX
@ -3520,7 +3603,7 @@ sequenceDecs_decodeSync_safe_bmi2_main_loop:
sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte: sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
CMPQ BX, $0x00 CMPQ BX, $0x00
JLE sequenceDecs_decodeSync_safe_bmi2_fill_end JLE sequenceDecs_decodeSync_safe_bmi2_fill_check_overread
CMPQ DX, $0x07 CMPQ DX, $0x07
JLE sequenceDecs_decodeSync_safe_bmi2_fill_end JLE sequenceDecs_decodeSync_safe_bmi2_fill_end
SHLQ $0x08, AX SHLQ $0x08, AX
@ -3531,6 +3614,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte:
ORQ CX, AX ORQ CX, AX
JMP sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte JMP sequenceDecs_decodeSync_safe_bmi2_fill_byte_by_byte
sequenceDecs_decodeSync_safe_bmi2_fill_check_overread:
CMPQ DX, $0x40
JA error_overread
sequenceDecs_decodeSync_safe_bmi2_fill_end: sequenceDecs_decodeSync_safe_bmi2_fill_end:
// Update offset // Update offset
MOVQ $0x00000808, CX MOVQ $0x00000808, CX
@ -3571,7 +3658,7 @@ sequenceDecs_decodeSync_safe_bmi2_fill_end:
sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte: sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
CMPQ BX, $0x00 CMPQ BX, $0x00
JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread
CMPQ DX, $0x07 CMPQ DX, $0x07
JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end JLE sequenceDecs_decodeSync_safe_bmi2_fill_2_end
SHLQ $0x08, AX SHLQ $0x08, AX
@ -3582,6 +3669,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte:
ORQ CX, AX ORQ CX, AX
JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte JMP sequenceDecs_decodeSync_safe_bmi2_fill_2_byte_by_byte
sequenceDecs_decodeSync_safe_bmi2_fill_2_check_overread:
CMPQ DX, $0x40
JA error_overread
sequenceDecs_decodeSync_safe_bmi2_fill_2_end: sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
// Update literal length // Update literal length
MOVQ $0x00000808, CX MOVQ $0x00000808, CX
@ -4019,9 +4110,9 @@ handle_loop:
loop_finished: loop_finished:
MOVQ br+8(FP), CX MOVQ br+8(FP), CX
MOVQ AX, 32(CX) MOVQ AX, 24(CX)
MOVB DL, 40(CX) MOVB DL, 32(CX)
MOVQ BX, 24(CX) MOVQ BX, 8(CX)
// Update the context // Update the context
MOVQ ctx+16(FP), AX MOVQ ctx+16(FP), AX
@ -4067,6 +4158,11 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP) MOVQ $0x00000004, ret+24(FP)
RET RET
// Return with overread error
error_overread:
MOVQ $0x00000006, ret+24(FP)
RET
// Return with not enough output space error // Return with not enough output space error
error_not_enough_space: error_not_enough_space:
MOVQ ctx+16(FP), AX MOVQ ctx+16(FP), AX

View File

@ -29,7 +29,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
} }
for i := range seqs { for i := range seqs {
var ll, mo, ml int var ll, mo, ml int
if br.off > 4+((maxOffsetBits+16+16)>>3) { if len(br.in) > 4+((maxOffsetBits+16+16)>>3) {
// inlined function: // inlined function:
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState) // ll, mo, ml = s.nextFast(br, llState, mlState, ofState)

View File

@ -95,10 +95,9 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
var written int64 var written int64
var readHeader bool var readHeader bool
{ {
var header []byte header := frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
var n int
header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
var n int
n, r.err = w.Write(header) n, r.err = w.Write(header)
if r.err != nil { if r.err != nil {
return written, r.err return written, r.err

View File

@ -9,7 +9,6 @@ import (
"errors" "errors"
"log" "log"
"math" "math"
"math/bits"
) )
// enable debug printing // enable debug printing
@ -106,33 +105,12 @@ func printf(format string, a ...interface{}) {
} }
} }
// matchLen returns the maximum common prefix length of a and b.
// a must be the shortest of the two.
func matchLen(a, b []byte) (n int) {
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
if diff != 0 {
return n + bits.TrailingZeros64(diff)>>3
}
n += 8
}
for i := range a {
if a[i] != b[i] {
break
}
n++
}
return n
}
func load3232(b []byte, i int32) uint32 { func load3232(b []byte, i int32) uint32 {
return binary.LittleEndian.Uint32(b[i:]) return binary.LittleEndian.Uint32(b[:len(b):len(b)][i:])
} }
func load6432(b []byte, i int32) uint64 { func load6432(b []byte, i int32) uint64 {
return binary.LittleEndian.Uint64(b[i:]) return binary.LittleEndian.Uint64(b[:len(b):len(b)][i:])
} }
type byter interface { type byter interface {

2
vendor/modules.txt vendored
View File

@ -138,7 +138,7 @@ github.com/imdario/mergo
# github.com/inconshreveable/mousetrap v1.1.0 # github.com/inconshreveable/mousetrap v1.1.0
## explicit; go 1.18 ## explicit; go 1.18
github.com/inconshreveable/mousetrap github.com/inconshreveable/mousetrap
# github.com/klauspost/compress v1.16.3 # github.com/klauspost/compress v1.17.2
## explicit; go 1.18 ## explicit; go 1.18
github.com/klauspost/compress github.com/klauspost/compress
github.com/klauspost/compress/fse github.com/klauspost/compress/fse