mirror of https://github.com/docker/cli.git
vendor: github.com/klauspost/compress v1.15.9
various fixes in zstd compression - https://github.com/klauspost/compress/releases/tag/v1.15.9 - https://github.com/klauspost/compress/releases/tag/v1.15.8 - https://github.com/klauspost/compress/releases/tag/v1.15.7 - https://github.com/klauspost/compress/releases/tag/v1.15.6 - https://github.com/klauspost/compress/releases/tag/v1.15.5 - https://github.com/klauspost/compress/releases/tag/v1.15.4 - https://github.com/klauspost/compress/releases/tag/v1.15.3 - https://github.com/klauspost/compress/releases/tag/v1.15.2 full diff: https://github.com/klauspost/compress/compare/v1.15.1...v1.15.9 Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
parent
813bd79471
commit
8771b956ea
|
@ -54,7 +54,7 @@ require (
|
||||||
github.com/golang/protobuf v1.5.2 // indirect
|
github.com/golang/protobuf v1.5.2 // indirect
|
||||||
github.com/gorilla/mux v1.8.0 // indirect; updated to v1.8.0 to get rid of old compatibility for "context"
|
github.com/gorilla/mux v1.8.0 // indirect; updated to v1.8.0 to get rid of old compatibility for "context"
|
||||||
github.com/inconshreveable/mousetrap v1.0.0 // indirect
|
github.com/inconshreveable/mousetrap v1.0.0 // indirect
|
||||||
github.com/klauspost/compress v1.15.1 // indirect
|
github.com/klauspost/compress v1.15.9 // indirect
|
||||||
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
|
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
|
||||||
github.com/miekg/pkcs11 v1.1.1 // indirect
|
github.com/miekg/pkcs11 v1.1.1 // indirect
|
||||||
github.com/moby/sys/symlink v0.2.0 // indirect
|
github.com/moby/sys/symlink v0.2.0 // indirect
|
||||||
|
|
|
@ -250,8 +250,8 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8
|
||||||
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
||||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||||
github.com/klauspost/compress v1.15.1 h1:y9FcTHGyrebwfP0ZZqFiaxTaiDnUrGkJkI+f583BL1A=
|
github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY=
|
||||||
github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
|
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
||||||
|
|
|
@ -23,3 +23,10 @@ _testmain.go
|
||||||
*.test
|
*.test
|
||||||
*.prof
|
*.prof
|
||||||
/s2/cmd/_s2sx/sfx-exe
|
/s2/cmd/_s2sx/sfx-exe
|
||||||
|
|
||||||
|
# Linux perf files
|
||||||
|
perf.data
|
||||||
|
perf.data.old
|
||||||
|
|
||||||
|
# gdb history
|
||||||
|
.gdb_history
|
||||||
|
|
|
@ -17,6 +17,72 @@ This package provides various compression algorithms.
|
||||||
|
|
||||||
# changelog
|
# changelog
|
||||||
|
|
||||||
|
* July 13, 2022 (v1.15.8)
|
||||||
|
|
||||||
|
* gzip: fix stack exhaustion bug in Reader.Read https://github.com/klauspost/compress/pull/641
|
||||||
|
* s2: Add Index header trim/restore https://github.com/klauspost/compress/pull/638
|
||||||
|
* zstd: Optimize seqdeq amd64 asm by @greatroar in https://github.com/klauspost/compress/pull/636
|
||||||
|
* zstd: Improve decoder memcopy https://github.com/klauspost/compress/pull/637
|
||||||
|
* huff0: Pass a single bitReader pointer to asm by @greatroar in https://github.com/klauspost/compress/pull/634
|
||||||
|
* zstd: Branchless getBits for amd64 w/o BMI2 by @greatroar in https://github.com/klauspost/compress/pull/640
|
||||||
|
* gzhttp: Remove header before writing https://github.com/klauspost/compress/pull/639
|
||||||
|
|
||||||
|
* June 29, 2022 (v1.15.7)
|
||||||
|
|
||||||
|
* s2: Fix absolute forward seeks https://github.com/klauspost/compress/pull/633
|
||||||
|
* zip: Merge upstream https://github.com/klauspost/compress/pull/631
|
||||||
|
* zip: Re-add zip64 fix https://github.com/klauspost/compress/pull/624
|
||||||
|
* zstd: translate fseDecoder.buildDtable into asm by @WojciechMula in https://github.com/klauspost/compress/pull/598
|
||||||
|
* flate: Faster histograms https://github.com/klauspost/compress/pull/620
|
||||||
|
* deflate: Use compound hcode https://github.com/klauspost/compress/pull/622
|
||||||
|
|
||||||
|
* June 3, 2022 (v1.15.6)
|
||||||
|
* s2: Improve coding for long, close matches https://github.com/klauspost/compress/pull/613
|
||||||
|
* s2c: Add Snappy/S2 stream recompression https://github.com/klauspost/compress/pull/611
|
||||||
|
* zstd: Always use configured block size https://github.com/klauspost/compress/pull/605
|
||||||
|
* zstd: Fix incorrect hash table placement for dict encoding in default https://github.com/klauspost/compress/pull/606
|
||||||
|
* zstd: Apply default config to ZipDecompressor without options https://github.com/klauspost/compress/pull/608
|
||||||
|
* gzhttp: Exclude more common archive formats https://github.com/klauspost/compress/pull/612
|
||||||
|
* s2: Add ReaderIgnoreCRC https://github.com/klauspost/compress/pull/609
|
||||||
|
* s2: Remove sanity load on index creation https://github.com/klauspost/compress/pull/607
|
||||||
|
* snappy: Use dedicated function for scoring https://github.com/klauspost/compress/pull/614
|
||||||
|
* s2c+s2d: Use official snappy framed extension https://github.com/klauspost/compress/pull/610
|
||||||
|
|
||||||
|
* May 25, 2022 (v1.15.5)
|
||||||
|
* s2: Add concurrent stream decompression https://github.com/klauspost/compress/pull/602
|
||||||
|
* s2: Fix final emit oob read crash on amd64 https://github.com/klauspost/compress/pull/601
|
||||||
|
* huff0: asm implementation of Decompress1X by @WojciechMula https://github.com/klauspost/compress/pull/596
|
||||||
|
* zstd: Use 1 less goroutine for stream decoding https://github.com/klauspost/compress/pull/588
|
||||||
|
* zstd: Copy literal in 16 byte blocks when possible https://github.com/klauspost/compress/pull/592
|
||||||
|
* zstd: Speed up when WithDecoderLowmem(false) https://github.com/klauspost/compress/pull/599
|
||||||
|
* zstd: faster next state update in BMI2 version of decode by @WojciechMula in https://github.com/klauspost/compress/pull/593
|
||||||
|
* huff0: Do not check max size when reading table. https://github.com/klauspost/compress/pull/586
|
||||||
|
* flate: Inplace hashing for level 7-9 by @klauspost in https://github.com/klauspost/compress/pull/590
|
||||||
|
|
||||||
|
|
||||||
|
* May 11, 2022 (v1.15.4)
|
||||||
|
* huff0: decompress directly into output by @WojciechMula in [#577](https://github.com/klauspost/compress/pull/577)
|
||||||
|
* inflate: Keep dict on stack [#581](https://github.com/klauspost/compress/pull/581)
|
||||||
|
* zstd: Faster decoding memcopy in asm [#583](https://github.com/klauspost/compress/pull/583)
|
||||||
|
* zstd: Fix ignored crc [#580](https://github.com/klauspost/compress/pull/580)
|
||||||
|
|
||||||
|
* May 5, 2022 (v1.15.3)
|
||||||
|
* zstd: Allow to ignore checksum checking by @WojciechMula [#572](https://github.com/klauspost/compress/pull/572)
|
||||||
|
* s2: Fix incorrect seek for io.SeekEnd in [#575](https://github.com/klauspost/compress/pull/575)
|
||||||
|
|
||||||
|
* Apr 26, 2022 (v1.15.2)
|
||||||
|
* zstd: Add x86-64 assembly for decompression on streams and blocks. Contributed by [@WojciechMula](https://github.com/WojciechMula). Typically 2x faster. [#528](https://github.com/klauspost/compress/pull/528) [#531](https://github.com/klauspost/compress/pull/531) [#545](https://github.com/klauspost/compress/pull/545) [#537](https://github.com/klauspost/compress/pull/537)
|
||||||
|
* zstd: Add options to ZipDecompressor and fixes [#539](https://github.com/klauspost/compress/pull/539)
|
||||||
|
* s2: Use sorted search for index [#555](https://github.com/klauspost/compress/pull/555)
|
||||||
|
* Minimum version is Go 1.16, added CI test on 1.18.
|
||||||
|
|
||||||
|
* Mar 11, 2022 (v1.15.1)
|
||||||
|
* huff0: Add x86 assembly of Decode4X by @WojciechMula in [#512](https://github.com/klauspost/compress/pull/512)
|
||||||
|
* zstd: Reuse zip decoders in [#514](https://github.com/klauspost/compress/pull/514)
|
||||||
|
* zstd: Detect extra block data and report as corrupted in [#520](https://github.com/klauspost/compress/pull/520)
|
||||||
|
* zstd: Handle zero sized frame content size stricter in [#521](https://github.com/klauspost/compress/pull/521)
|
||||||
|
* zstd: Add stricter block size checks in [#523](https://github.com/klauspost/compress/pull/523)
|
||||||
|
|
||||||
* Mar 3, 2022 (v1.15.0)
|
* Mar 3, 2022 (v1.15.0)
|
||||||
* zstd: Refactor decoder by @klauspost in [#498](https://github.com/klauspost/compress/pull/498)
|
* zstd: Refactor decoder by @klauspost in [#498](https://github.com/klauspost/compress/pull/498)
|
||||||
* zstd: Add stream encoding without goroutines by @klauspost in [#505](https://github.com/klauspost/compress/pull/505)
|
* zstd: Add stream encoding without goroutines by @klauspost in [#505](https://github.com/klauspost/compress/pull/505)
|
||||||
|
@ -60,6 +126,9 @@ While the release has been extensively tested, it is recommended to testing when
|
||||||
* zstd: add arm64 xxhash assembly in [#464](https://github.com/klauspost/compress/pull/464)
|
* zstd: add arm64 xxhash assembly in [#464](https://github.com/klauspost/compress/pull/464)
|
||||||
* Add garbled for binaries for s2 in [#445](https://github.com/klauspost/compress/pull/445)
|
* Add garbled for binaries for s2 in [#445](https://github.com/klauspost/compress/pull/445)
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>See changes to v1.13.x</summary>
|
||||||
|
|
||||||
* Aug 30, 2021 (v1.13.5)
|
* Aug 30, 2021 (v1.13.5)
|
||||||
* gz/zlib/flate: Alias stdlib errors [#425](https://github.com/klauspost/compress/pull/425)
|
* gz/zlib/flate: Alias stdlib errors [#425](https://github.com/klauspost/compress/pull/425)
|
||||||
* s2: Add block support to commandline tools [#413](https://github.com/klauspost/compress/pull/413)
|
* s2: Add block support to commandline tools [#413](https://github.com/klauspost/compress/pull/413)
|
||||||
|
@ -88,6 +157,8 @@ While the release has been extensively tested, it is recommended to testing when
|
||||||
* Added [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp#gzip-handler) which allows wrapping HTTP servers and clients with GZIP compressors.
|
* Added [gzhttp](https://github.com/klauspost/compress/tree/master/gzhttp#gzip-handler) which allows wrapping HTTP servers and clients with GZIP compressors.
|
||||||
* zstd: Detect short invalid signatures [#382](https://github.com/klauspost/compress/pull/382)
|
* zstd: Detect short invalid signatures [#382](https://github.com/klauspost/compress/pull/382)
|
||||||
* zstd: Spawn decoder goroutine only if needed. [#380](https://github.com/klauspost/compress/pull/380)
|
* zstd: Spawn decoder goroutine only if needed. [#380](https://github.com/klauspost/compress/pull/380)
|
||||||
|
</details>
|
||||||
|
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary>See changes to v1.12.x</summary>
|
<summary>See changes to v1.12.x</summary>
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
package huff0
|
|
||||||
|
|
||||||
//go:generate go run generate.go
|
|
||||||
//go:generate asmfmt -w decompress_amd64.s
|
|
||||||
//go:generate asmfmt -w decompress_8b_amd64.s
|
|
|
@ -165,11 +165,6 @@ func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 {
|
||||||
return uint16(b.value >> ((64 - n) & 63))
|
return uint16(b.value >> ((64 - n) & 63))
|
||||||
}
|
}
|
||||||
|
|
||||||
// peekTopBits(n) is equvialent to peekBitFast(64 - n)
|
|
||||||
func (b *bitReaderShifted) peekTopBits(n uint8) uint16 {
|
|
||||||
return uint16(b.value >> n)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *bitReaderShifted) advance(n uint8) {
|
func (b *bitReaderShifted) advance(n uint8) {
|
||||||
b.bitsRead += n
|
b.bitsRead += n
|
||||||
b.value <<= n & 63
|
b.value <<= n & 63
|
||||||
|
@ -220,11 +215,6 @@ func (b *bitReaderShifted) fill() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// finished returns true if all bits have been read from the bit stream.
|
|
||||||
func (b *bitReaderShifted) finished() bool {
|
|
||||||
return b.off == 0 && b.bitsRead >= 64
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *bitReaderShifted) remaining() uint {
|
func (b *bitReaderShifted) remaining() uint {
|
||||||
return b.off*8 + uint(64-b.bitsRead)
|
return b.off*8 + uint(64-b.bitsRead)
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,8 +5,6 @@
|
||||||
|
|
||||||
package huff0
|
package huff0
|
||||||
|
|
||||||
import "fmt"
|
|
||||||
|
|
||||||
// bitWriter will write bits.
|
// bitWriter will write bits.
|
||||||
// First bit will be LSB of the first byte of output.
|
// First bit will be LSB of the first byte of output.
|
||||||
type bitWriter struct {
|
type bitWriter struct {
|
||||||
|
@ -23,14 +21,6 @@ var bitMask16 = [32]uint16{
|
||||||
0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
|
0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
|
||||||
0xFFFF, 0xFFFF} /* up to 16 bits */
|
0xFFFF, 0xFFFF} /* up to 16 bits */
|
||||||
|
|
||||||
// addBits16NC will add up to 16 bits.
|
|
||||||
// It will not check if there is space for them,
|
|
||||||
// so the caller must ensure that it has flushed recently.
|
|
||||||
func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
|
|
||||||
b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63)
|
|
||||||
b.nBits += bits
|
|
||||||
}
|
|
||||||
|
|
||||||
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
|
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
|
||||||
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
|
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
|
||||||
func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
|
func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
|
||||||
|
@ -70,104 +60,6 @@ func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
|
||||||
b.nBits += encA.nBits + encB.nBits
|
b.nBits += encA.nBits + encB.nBits
|
||||||
}
|
}
|
||||||
|
|
||||||
// addBits16ZeroNC will add up to 16 bits.
|
|
||||||
// It will not check if there is space for them,
|
|
||||||
// so the caller must ensure that it has flushed recently.
|
|
||||||
// This is fastest if bits can be zero.
|
|
||||||
func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) {
|
|
||||||
if bits == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
value <<= (16 - bits) & 15
|
|
||||||
value >>= (16 - bits) & 15
|
|
||||||
b.bitContainer |= uint64(value) << (b.nBits & 63)
|
|
||||||
b.nBits += bits
|
|
||||||
}
|
|
||||||
|
|
||||||
// flush will flush all pending full bytes.
|
|
||||||
// There will be at least 56 bits available for writing when this has been called.
|
|
||||||
// Using flush32 is faster, but leaves less space for writing.
|
|
||||||
func (b *bitWriter) flush() {
|
|
||||||
v := b.nBits >> 3
|
|
||||||
switch v {
|
|
||||||
case 0:
|
|
||||||
return
|
|
||||||
case 1:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
)
|
|
||||||
b.bitContainer >>= 1 << 3
|
|
||||||
case 2:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
)
|
|
||||||
b.bitContainer >>= 2 << 3
|
|
||||||
case 3:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
byte(b.bitContainer>>16),
|
|
||||||
)
|
|
||||||
b.bitContainer >>= 3 << 3
|
|
||||||
case 4:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
byte(b.bitContainer>>16),
|
|
||||||
byte(b.bitContainer>>24),
|
|
||||||
)
|
|
||||||
b.bitContainer >>= 4 << 3
|
|
||||||
case 5:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
byte(b.bitContainer>>16),
|
|
||||||
byte(b.bitContainer>>24),
|
|
||||||
byte(b.bitContainer>>32),
|
|
||||||
)
|
|
||||||
b.bitContainer >>= 5 << 3
|
|
||||||
case 6:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
byte(b.bitContainer>>16),
|
|
||||||
byte(b.bitContainer>>24),
|
|
||||||
byte(b.bitContainer>>32),
|
|
||||||
byte(b.bitContainer>>40),
|
|
||||||
)
|
|
||||||
b.bitContainer >>= 6 << 3
|
|
||||||
case 7:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
byte(b.bitContainer>>16),
|
|
||||||
byte(b.bitContainer>>24),
|
|
||||||
byte(b.bitContainer>>32),
|
|
||||||
byte(b.bitContainer>>40),
|
|
||||||
byte(b.bitContainer>>48),
|
|
||||||
)
|
|
||||||
b.bitContainer >>= 7 << 3
|
|
||||||
case 8:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
byte(b.bitContainer>>16),
|
|
||||||
byte(b.bitContainer>>24),
|
|
||||||
byte(b.bitContainer>>32),
|
|
||||||
byte(b.bitContainer>>40),
|
|
||||||
byte(b.bitContainer>>48),
|
|
||||||
byte(b.bitContainer>>56),
|
|
||||||
)
|
|
||||||
b.bitContainer = 0
|
|
||||||
b.nBits = 0
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
panic(fmt.Errorf("bits (%d) > 64", b.nBits))
|
|
||||||
}
|
|
||||||
b.nBits &= 7
|
|
||||||
}
|
|
||||||
|
|
||||||
// flush32 will flush out, so there are at least 32 bits available for writing.
|
// flush32 will flush out, so there are at least 32 bits available for writing.
|
||||||
func (b *bitWriter) flush32() {
|
func (b *bitWriter) flush32() {
|
||||||
if b.nBits < 32 {
|
if b.nBits < 32 {
|
||||||
|
@ -201,10 +93,3 @@ func (b *bitWriter) close() error {
|
||||||
b.flushAlign()
|
b.flushAlign()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset and continue writing by appending to out.
|
|
||||||
func (b *bitWriter) reset(out []byte) {
|
|
||||||
b.bitContainer = 0
|
|
||||||
b.nBits = 0
|
|
||||||
b.out = out
|
|
||||||
}
|
|
||||||
|
|
|
@ -20,11 +20,6 @@ func (b *byteReader) init(in []byte) {
|
||||||
b.off = 0
|
b.off = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// advance the stream b n bytes.
|
|
||||||
func (b *byteReader) advance(n uint) {
|
|
||||||
b.off += int(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Int32 returns a little endian int32 starting at current offset.
|
// Int32 returns a little endian int32 starting at current offset.
|
||||||
func (b byteReader) Int32() int32 {
|
func (b byteReader) Int32() int32 {
|
||||||
v3 := int32(b.b[b.off+3])
|
v3 := int32(b.b[b.off+3])
|
||||||
|
@ -43,11 +38,6 @@ func (b byteReader) Uint32() uint32 {
|
||||||
return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
|
return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
|
||||||
}
|
}
|
||||||
|
|
||||||
// unread returns the unread portion of the input.
|
|
||||||
func (b byteReader) unread() []byte {
|
|
||||||
return b.b[b.off:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// remain will return the number of bytes remaining.
|
// remain will return the number of bytes remaining.
|
||||||
func (b byteReader) remain() int {
|
func (b byteReader) remain() int {
|
||||||
return len(b.b) - b.off
|
return len(b.b) - b.off
|
||||||
|
|
|
@ -404,6 +404,7 @@ func (s *Scratch) canUseTable(c cTable) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//lint:ignore U1000 used for debugging
|
||||||
func (s *Scratch) validateTable(c cTable) bool {
|
func (s *Scratch) validateTable(c cTable) bool {
|
||||||
if len(c) < int(s.symbolLen) {
|
if len(c) < int(s.symbolLen) {
|
||||||
return false
|
return false
|
||||||
|
|
|
@ -11,7 +11,6 @@ import (
|
||||||
|
|
||||||
type dTable struct {
|
type dTable struct {
|
||||||
single []dEntrySingle
|
single []dEntrySingle
|
||||||
double []dEntryDouble
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// single-symbols decoding
|
// single-symbols decoding
|
||||||
|
@ -19,13 +18,6 @@ type dEntrySingle struct {
|
||||||
entry uint16
|
entry uint16
|
||||||
}
|
}
|
||||||
|
|
||||||
// double-symbols decoding
|
|
||||||
type dEntryDouble struct {
|
|
||||||
seq [4]byte
|
|
||||||
nBits uint8
|
|
||||||
len uint8
|
|
||||||
}
|
|
||||||
|
|
||||||
// Uses special code for all tables that are < 8 bits.
|
// Uses special code for all tables that are < 8 bits.
|
||||||
const use8BitTables = true
|
const use8BitTables = true
|
||||||
|
|
||||||
|
@ -35,7 +27,7 @@ const use8BitTables = true
|
||||||
// If no Scratch is provided a new one is allocated.
|
// If no Scratch is provided a new one is allocated.
|
||||||
// The returned Scratch can be used for encoding or decoding input using this table.
|
// The returned Scratch can be used for encoding or decoding input using this table.
|
||||||
func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
|
func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
|
||||||
s, err = s.prepare(in)
|
s, err = s.prepare(nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return s, nil, err
|
return s, nil, err
|
||||||
}
|
}
|
||||||
|
@ -236,108 +228,6 @@ func (d *Decoder) buffer() *[4][256]byte {
|
||||||
return &[4][256]byte{}
|
return &[4][256]byte{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decompress1X will decompress a 1X encoded stream.
|
|
||||||
// The cap of the output buffer will be the maximum decompressed size.
|
|
||||||
// The length of the supplied input must match the end of a block exactly.
|
|
||||||
func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
|
|
||||||
if len(d.dt.single) == 0 {
|
|
||||||
return nil, errors.New("no table loaded")
|
|
||||||
}
|
|
||||||
if use8BitTables && d.actualTableLog <= 8 {
|
|
||||||
return d.decompress1X8Bit(dst, src)
|
|
||||||
}
|
|
||||||
var br bitReaderShifted
|
|
||||||
err := br.init(src)
|
|
||||||
if err != nil {
|
|
||||||
return dst, err
|
|
||||||
}
|
|
||||||
maxDecodedSize := cap(dst)
|
|
||||||
dst = dst[:0]
|
|
||||||
|
|
||||||
// Avoid bounds check by always having full sized table.
|
|
||||||
const tlSize = 1 << tableLogMax
|
|
||||||
const tlMask = tlSize - 1
|
|
||||||
dt := d.dt.single[:tlSize]
|
|
||||||
|
|
||||||
// Use temp table to avoid bound checks/append penalty.
|
|
||||||
bufs := d.buffer()
|
|
||||||
buf := &bufs[0]
|
|
||||||
var off uint8
|
|
||||||
|
|
||||||
for br.off >= 8 {
|
|
||||||
br.fillFast()
|
|
||||||
v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
|
|
||||||
br.advance(uint8(v.entry))
|
|
||||||
buf[off+0] = uint8(v.entry >> 8)
|
|
||||||
|
|
||||||
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
|
|
||||||
br.advance(uint8(v.entry))
|
|
||||||
buf[off+1] = uint8(v.entry >> 8)
|
|
||||||
|
|
||||||
// Refill
|
|
||||||
br.fillFast()
|
|
||||||
|
|
||||||
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
|
|
||||||
br.advance(uint8(v.entry))
|
|
||||||
buf[off+2] = uint8(v.entry >> 8)
|
|
||||||
|
|
||||||
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
|
|
||||||
br.advance(uint8(v.entry))
|
|
||||||
buf[off+3] = uint8(v.entry >> 8)
|
|
||||||
|
|
||||||
off += 4
|
|
||||||
if off == 0 {
|
|
||||||
if len(dst)+256 > maxDecodedSize {
|
|
||||||
br.close()
|
|
||||||
d.bufs.Put(bufs)
|
|
||||||
return nil, ErrMaxDecodedSizeExceeded
|
|
||||||
}
|
|
||||||
dst = append(dst, buf[:]...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(dst)+int(off) > maxDecodedSize {
|
|
||||||
d.bufs.Put(bufs)
|
|
||||||
br.close()
|
|
||||||
return nil, ErrMaxDecodedSizeExceeded
|
|
||||||
}
|
|
||||||
dst = append(dst, buf[:off]...)
|
|
||||||
|
|
||||||
// br < 8, so uint8 is fine
|
|
||||||
bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
|
|
||||||
for bitsLeft > 0 {
|
|
||||||
br.fill()
|
|
||||||
if false && br.bitsRead >= 32 {
|
|
||||||
if br.off >= 4 {
|
|
||||||
v := br.in[br.off-4:]
|
|
||||||
v = v[:4]
|
|
||||||
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
|
||||||
br.value = (br.value << 32) | uint64(low)
|
|
||||||
br.bitsRead -= 32
|
|
||||||
br.off -= 4
|
|
||||||
} else {
|
|
||||||
for br.off > 0 {
|
|
||||||
br.value = (br.value << 8) | uint64(br.in[br.off-1])
|
|
||||||
br.bitsRead -= 8
|
|
||||||
br.off--
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(dst) >= maxDecodedSize {
|
|
||||||
d.bufs.Put(bufs)
|
|
||||||
br.close()
|
|
||||||
return nil, ErrMaxDecodedSizeExceeded
|
|
||||||
}
|
|
||||||
v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
|
|
||||||
nBits := uint8(v.entry)
|
|
||||||
br.advance(nBits)
|
|
||||||
bitsLeft -= nBits
|
|
||||||
dst = append(dst, uint8(v.entry>>8))
|
|
||||||
}
|
|
||||||
d.bufs.Put(bufs)
|
|
||||||
return dst, br.close()
|
|
||||||
}
|
|
||||||
|
|
||||||
// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
|
// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
|
||||||
// The cap of the output buffer will be the maximum decompressed size.
|
// The cap of the output buffer will be the maximum decompressed size.
|
||||||
// The length of the supplied input must match the end of a block exactly.
|
// The length of the supplied input must match the end of a block exactly.
|
||||||
|
@ -995,7 +885,6 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
|
||||||
|
|
||||||
const shift = 56
|
const shift = 56
|
||||||
const tlSize = 1 << 8
|
const tlSize = 1 << 8
|
||||||
const tlMask = tlSize - 1
|
|
||||||
single := d.dt.single[:tlSize]
|
single := d.dt.single[:tlSize]
|
||||||
|
|
||||||
// Use temp table to avoid bound checks/append penalty.
|
// Use temp table to avoid bound checks/append penalty.
|
||||||
|
|
|
@ -1,488 +0,0 @@
|
||||||
// +build !appengine
|
|
||||||
// +build gc
|
|
||||||
// +build !noasm
|
|
||||||
|
|
||||||
#include "textflag.h"
|
|
||||||
#include "funcdata.h"
|
|
||||||
#include "go_asm.h"
|
|
||||||
|
|
||||||
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
|
||||||
|
|
||||||
// func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
|
||||||
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
|
||||||
TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8
|
|
||||||
#define off R8
|
|
||||||
#define buffer DI
|
|
||||||
#define table SI
|
|
||||||
|
|
||||||
#define br_bits_read R9
|
|
||||||
#define br_value R10
|
|
||||||
#define br_offset R11
|
|
||||||
#define peek_bits R12
|
|
||||||
#define exhausted DX
|
|
||||||
|
|
||||||
#define br0 R13
|
|
||||||
#define br1 R14
|
|
||||||
#define br2 R15
|
|
||||||
#define br3 BP
|
|
||||||
|
|
||||||
MOVQ BP, 0(SP)
|
|
||||||
|
|
||||||
XORQ exhausted, exhausted // exhausted = false
|
|
||||||
XORQ off, off // off = 0
|
|
||||||
|
|
||||||
MOVBQZX peekBits+32(FP), peek_bits
|
|
||||||
MOVQ buf+40(FP), buffer
|
|
||||||
MOVQ tbl+48(FP), table
|
|
||||||
|
|
||||||
MOVQ pbr0+0(FP), br0
|
|
||||||
MOVQ pbr1+8(FP), br1
|
|
||||||
MOVQ pbr2+16(FP), br2
|
|
||||||
MOVQ pbr3+24(FP), br3
|
|
||||||
|
|
||||||
main_loop:
|
|
||||||
|
|
||||||
// const stream = 0
|
|
||||||
// br0.fillFast()
|
|
||||||
MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read
|
|
||||||
MOVQ bitReaderShifted_value(br0), br_value
|
|
||||||
MOVQ bitReaderShifted_off(br0), br_offset
|
|
||||||
|
|
||||||
// if b.bitsRead >= 32 {
|
|
||||||
CMPQ br_bits_read, $32
|
|
||||||
JB skip_fill0
|
|
||||||
|
|
||||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
|
||||||
SUBQ $4, br_offset // b.off -= 4
|
|
||||||
|
|
||||||
// v := b.in[b.off-4 : b.off]
|
|
||||||
// v = v[:4]
|
|
||||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
|
||||||
MOVQ bitReaderShifted_in(br0), AX
|
|
||||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
|
||||||
|
|
||||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
||||||
MOVQ br_bits_read, CX
|
|
||||||
SHLQ CL, AX
|
|
||||||
ORQ AX, br_value
|
|
||||||
|
|
||||||
// exhausted = exhausted || (br0.off < 4)
|
|
||||||
CMPQ br_offset, $4
|
|
||||||
SETLT DL
|
|
||||||
ORB DL, DH
|
|
||||||
|
|
||||||
// }
|
|
||||||
skip_fill0:
|
|
||||||
|
|
||||||
// val0 := br0.peekTopBits(peekBits)
|
|
||||||
MOVQ br_value, AX
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v0 := table[val0&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v0
|
|
||||||
|
|
||||||
// br0.advance(uint8(v0.entry))
|
|
||||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CL, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// val1 := br0.peekTopBits(peekBits)
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
MOVQ br_value, AX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v1 := table[val1&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v1
|
|
||||||
|
|
||||||
// br0.advance(uint8(v1.entry))
|
|
||||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CX, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// these two writes get coalesced
|
|
||||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
|
||||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
|
||||||
MOVW BX, 0(buffer)(off*1)
|
|
||||||
|
|
||||||
// SECOND PART:
|
|
||||||
// val2 := br0.peekTopBits(peekBits)
|
|
||||||
MOVQ br_value, AX
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v2 := table[val0&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v0
|
|
||||||
|
|
||||||
// br0.advance(uint8(v0.entry))
|
|
||||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CL, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// val3 := br0.peekTopBits(peekBits)
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
MOVQ br_value, AX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v3 := table[val1&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v1
|
|
||||||
|
|
||||||
// br0.advance(uint8(v1.entry))
|
|
||||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CX, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// these two writes get coalesced
|
|
||||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
|
||||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
|
||||||
MOVW BX, 0+2(buffer)(off*1)
|
|
||||||
|
|
||||||
// update the bitrader reader structure
|
|
||||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br0)
|
|
||||||
MOVQ br_value, bitReaderShifted_value(br0)
|
|
||||||
MOVQ br_offset, bitReaderShifted_off(br0)
|
|
||||||
|
|
||||||
// const stream = 1
|
|
||||||
// br1.fillFast()
|
|
||||||
MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read
|
|
||||||
MOVQ bitReaderShifted_value(br1), br_value
|
|
||||||
MOVQ bitReaderShifted_off(br1), br_offset
|
|
||||||
|
|
||||||
// if b.bitsRead >= 32 {
|
|
||||||
CMPQ br_bits_read, $32
|
|
||||||
JB skip_fill1
|
|
||||||
|
|
||||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
|
||||||
SUBQ $4, br_offset // b.off -= 4
|
|
||||||
|
|
||||||
// v := b.in[b.off-4 : b.off]
|
|
||||||
// v = v[:4]
|
|
||||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
|
||||||
MOVQ bitReaderShifted_in(br1), AX
|
|
||||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
|
||||||
|
|
||||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
||||||
MOVQ br_bits_read, CX
|
|
||||||
SHLQ CL, AX
|
|
||||||
ORQ AX, br_value
|
|
||||||
|
|
||||||
// exhausted = exhausted || (br1.off < 4)
|
|
||||||
CMPQ br_offset, $4
|
|
||||||
SETLT DL
|
|
||||||
ORB DL, DH
|
|
||||||
|
|
||||||
// }
|
|
||||||
skip_fill1:
|
|
||||||
|
|
||||||
// val0 := br1.peekTopBits(peekBits)
|
|
||||||
MOVQ br_value, AX
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v0 := table[val0&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v0
|
|
||||||
|
|
||||||
// br1.advance(uint8(v0.entry))
|
|
||||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CL, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// val1 := br1.peekTopBits(peekBits)
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
MOVQ br_value, AX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v1 := table[val1&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v1
|
|
||||||
|
|
||||||
// br1.advance(uint8(v1.entry))
|
|
||||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CX, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// these two writes get coalesced
|
|
||||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
|
||||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
|
||||||
MOVW BX, 256(buffer)(off*1)
|
|
||||||
|
|
||||||
// SECOND PART:
|
|
||||||
// val2 := br1.peekTopBits(peekBits)
|
|
||||||
MOVQ br_value, AX
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v2 := table[val0&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v0
|
|
||||||
|
|
||||||
// br1.advance(uint8(v0.entry))
|
|
||||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CL, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// val3 := br1.peekTopBits(peekBits)
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
MOVQ br_value, AX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v3 := table[val1&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v1
|
|
||||||
|
|
||||||
// br1.advance(uint8(v1.entry))
|
|
||||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CX, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// these two writes get coalesced
|
|
||||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
|
||||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
|
||||||
MOVW BX, 256+2(buffer)(off*1)
|
|
||||||
|
|
||||||
// update the bitrader reader structure
|
|
||||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br1)
|
|
||||||
MOVQ br_value, bitReaderShifted_value(br1)
|
|
||||||
MOVQ br_offset, bitReaderShifted_off(br1)
|
|
||||||
|
|
||||||
// const stream = 2
|
|
||||||
// br2.fillFast()
|
|
||||||
MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read
|
|
||||||
MOVQ bitReaderShifted_value(br2), br_value
|
|
||||||
MOVQ bitReaderShifted_off(br2), br_offset
|
|
||||||
|
|
||||||
// if b.bitsRead >= 32 {
|
|
||||||
CMPQ br_bits_read, $32
|
|
||||||
JB skip_fill2
|
|
||||||
|
|
||||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
|
||||||
SUBQ $4, br_offset // b.off -= 4
|
|
||||||
|
|
||||||
// v := b.in[b.off-4 : b.off]
|
|
||||||
// v = v[:4]
|
|
||||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
|
||||||
MOVQ bitReaderShifted_in(br2), AX
|
|
||||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
|
||||||
|
|
||||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
||||||
MOVQ br_bits_read, CX
|
|
||||||
SHLQ CL, AX
|
|
||||||
ORQ AX, br_value
|
|
||||||
|
|
||||||
// exhausted = exhausted || (br2.off < 4)
|
|
||||||
CMPQ br_offset, $4
|
|
||||||
SETLT DL
|
|
||||||
ORB DL, DH
|
|
||||||
|
|
||||||
// }
|
|
||||||
skip_fill2:
|
|
||||||
|
|
||||||
// val0 := br2.peekTopBits(peekBits)
|
|
||||||
MOVQ br_value, AX
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v0 := table[val0&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v0
|
|
||||||
|
|
||||||
// br2.advance(uint8(v0.entry))
|
|
||||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CL, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// val1 := br2.peekTopBits(peekBits)
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
MOVQ br_value, AX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v1 := table[val1&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v1
|
|
||||||
|
|
||||||
// br2.advance(uint8(v1.entry))
|
|
||||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CX, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// these two writes get coalesced
|
|
||||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
|
||||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
|
||||||
MOVW BX, 512(buffer)(off*1)
|
|
||||||
|
|
||||||
// SECOND PART:
|
|
||||||
// val2 := br2.peekTopBits(peekBits)
|
|
||||||
MOVQ br_value, AX
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v2 := table[val0&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v0
|
|
||||||
|
|
||||||
// br2.advance(uint8(v0.entry))
|
|
||||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CL, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// val3 := br2.peekTopBits(peekBits)
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
MOVQ br_value, AX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v3 := table[val1&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v1
|
|
||||||
|
|
||||||
// br2.advance(uint8(v1.entry))
|
|
||||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CX, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// these two writes get coalesced
|
|
||||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
|
||||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
|
||||||
MOVW BX, 512+2(buffer)(off*1)
|
|
||||||
|
|
||||||
// update the bitrader reader structure
|
|
||||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br2)
|
|
||||||
MOVQ br_value, bitReaderShifted_value(br2)
|
|
||||||
MOVQ br_offset, bitReaderShifted_off(br2)
|
|
||||||
|
|
||||||
// const stream = 3
|
|
||||||
// br3.fillFast()
|
|
||||||
MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read
|
|
||||||
MOVQ bitReaderShifted_value(br3), br_value
|
|
||||||
MOVQ bitReaderShifted_off(br3), br_offset
|
|
||||||
|
|
||||||
// if b.bitsRead >= 32 {
|
|
||||||
CMPQ br_bits_read, $32
|
|
||||||
JB skip_fill3
|
|
||||||
|
|
||||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
|
||||||
SUBQ $4, br_offset // b.off -= 4
|
|
||||||
|
|
||||||
// v := b.in[b.off-4 : b.off]
|
|
||||||
// v = v[:4]
|
|
||||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
|
||||||
MOVQ bitReaderShifted_in(br3), AX
|
|
||||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
|
||||||
|
|
||||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
||||||
MOVQ br_bits_read, CX
|
|
||||||
SHLQ CL, AX
|
|
||||||
ORQ AX, br_value
|
|
||||||
|
|
||||||
// exhausted = exhausted || (br3.off < 4)
|
|
||||||
CMPQ br_offset, $4
|
|
||||||
SETLT DL
|
|
||||||
ORB DL, DH
|
|
||||||
|
|
||||||
// }
|
|
||||||
skip_fill3:
|
|
||||||
|
|
||||||
// val0 := br3.peekTopBits(peekBits)
|
|
||||||
MOVQ br_value, AX
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v0 := table[val0&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v0
|
|
||||||
|
|
||||||
// br3.advance(uint8(v0.entry))
|
|
||||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CL, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// val1 := br3.peekTopBits(peekBits)
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
MOVQ br_value, AX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v1 := table[val1&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v1
|
|
||||||
|
|
||||||
// br3.advance(uint8(v1.entry))
|
|
||||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CX, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// these two writes get coalesced
|
|
||||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
|
||||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
|
||||||
MOVW BX, 768(buffer)(off*1)
|
|
||||||
|
|
||||||
// SECOND PART:
|
|
||||||
// val2 := br3.peekTopBits(peekBits)
|
|
||||||
MOVQ br_value, AX
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v2 := table[val0&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v0
|
|
||||||
|
|
||||||
// br3.advance(uint8(v0.entry))
|
|
||||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CL, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// val3 := br3.peekTopBits(peekBits)
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
MOVQ br_value, AX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v3 := table[val1&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v1
|
|
||||||
|
|
||||||
// br3.advance(uint8(v1.entry))
|
|
||||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CX, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// these two writes get coalesced
|
|
||||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
|
||||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
|
||||||
MOVW BX, 768+2(buffer)(off*1)
|
|
||||||
|
|
||||||
// update the bitrader reader structure
|
|
||||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br3)
|
|
||||||
MOVQ br_value, bitReaderShifted_value(br3)
|
|
||||||
MOVQ br_offset, bitReaderShifted_off(br3)
|
|
||||||
|
|
||||||
ADDQ $4, off // off += 2
|
|
||||||
|
|
||||||
TESTB DH, DH // any br[i].ofs < 4?
|
|
||||||
JNZ end
|
|
||||||
|
|
||||||
CMPQ off, $bufoff
|
|
||||||
JL main_loop
|
|
||||||
|
|
||||||
end:
|
|
||||||
MOVQ 0(SP), BP
|
|
||||||
|
|
||||||
MOVB off, ret+56(FP)
|
|
||||||
RET
|
|
||||||
|
|
||||||
#undef off
|
|
||||||
#undef buffer
|
|
||||||
#undef table
|
|
||||||
|
|
||||||
#undef br_bits_read
|
|
||||||
#undef br_value
|
|
||||||
#undef br_offset
|
|
||||||
#undef peek_bits
|
|
||||||
#undef exhausted
|
|
||||||
|
|
||||||
#undef br0
|
|
||||||
#undef br1
|
|
||||||
#undef br2
|
|
||||||
#undef br3
|
|
|
@ -1,197 +0,0 @@
|
||||||
// +build !appengine
|
|
||||||
// +build gc
|
|
||||||
// +build !noasm
|
|
||||||
|
|
||||||
#include "textflag.h"
|
|
||||||
#include "funcdata.h"
|
|
||||||
#include "go_asm.h"
|
|
||||||
|
|
||||||
|
|
||||||
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
|
||||||
|
|
||||||
//func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
|
||||||
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
|
||||||
TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8
|
|
||||||
#define off R8
|
|
||||||
#define buffer DI
|
|
||||||
#define table SI
|
|
||||||
|
|
||||||
#define br_bits_read R9
|
|
||||||
#define br_value R10
|
|
||||||
#define br_offset R11
|
|
||||||
#define peek_bits R12
|
|
||||||
#define exhausted DX
|
|
||||||
|
|
||||||
#define br0 R13
|
|
||||||
#define br1 R14
|
|
||||||
#define br2 R15
|
|
||||||
#define br3 BP
|
|
||||||
|
|
||||||
MOVQ BP, 0(SP)
|
|
||||||
|
|
||||||
XORQ exhausted, exhausted // exhausted = false
|
|
||||||
XORQ off, off // off = 0
|
|
||||||
|
|
||||||
MOVBQZX peekBits+32(FP), peek_bits
|
|
||||||
MOVQ buf+40(FP), buffer
|
|
||||||
MOVQ tbl+48(FP), table
|
|
||||||
|
|
||||||
MOVQ pbr0+0(FP), br0
|
|
||||||
MOVQ pbr1+8(FP), br1
|
|
||||||
MOVQ pbr2+16(FP), br2
|
|
||||||
MOVQ pbr3+24(FP), br3
|
|
||||||
|
|
||||||
main_loop:
|
|
||||||
{{ define "decode_2_values_x86" }}
|
|
||||||
// const stream = {{ var "id" }}
|
|
||||||
// br{{ var "id"}}.fillFast()
|
|
||||||
MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read
|
|
||||||
MOVQ bitReaderShifted_value(br{{ var "id" }}), br_value
|
|
||||||
MOVQ bitReaderShifted_off(br{{ var "id" }}), br_offset
|
|
||||||
|
|
||||||
// if b.bitsRead >= 32 {
|
|
||||||
CMPQ br_bits_read, $32
|
|
||||||
JB skip_fill{{ var "id" }}
|
|
||||||
|
|
||||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
|
||||||
SUBQ $4, br_offset // b.off -= 4
|
|
||||||
|
|
||||||
// v := b.in[b.off-4 : b.off]
|
|
||||||
// v = v[:4]
|
|
||||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
|
||||||
MOVQ bitReaderShifted_in(br{{ var "id" }}), AX
|
|
||||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
|
||||||
|
|
||||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
||||||
MOVQ br_bits_read, CX
|
|
||||||
SHLQ CL, AX
|
|
||||||
ORQ AX, br_value
|
|
||||||
|
|
||||||
// exhausted = exhausted || (br{{ var "id"}}.off < 4)
|
|
||||||
CMPQ br_offset, $4
|
|
||||||
SETLT DL
|
|
||||||
ORB DL, DH
|
|
||||||
// }
|
|
||||||
skip_fill{{ var "id" }}:
|
|
||||||
|
|
||||||
// val0 := br{{ var "id"}}.peekTopBits(peekBits)
|
|
||||||
MOVQ br_value, AX
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v0 := table[val0&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v0
|
|
||||||
|
|
||||||
// br{{ var "id"}}.advance(uint8(v0.entry))
|
|
||||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CL, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// val1 := br{{ var "id"}}.peekTopBits(peekBits)
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
MOVQ br_value, AX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v1 := table[val1&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v1
|
|
||||||
|
|
||||||
// br{{ var "id"}}.advance(uint8(v1.entry))
|
|
||||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CX, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
|
|
||||||
// these two writes get coalesced
|
|
||||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
|
||||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
|
||||||
MOVW BX, {{ var "bufofs" }}(buffer)(off*1)
|
|
||||||
|
|
||||||
// SECOND PART:
|
|
||||||
// val2 := br{{ var "id"}}.peekTopBits(peekBits)
|
|
||||||
MOVQ br_value, AX
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v2 := table[val0&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v0
|
|
||||||
|
|
||||||
// br{{ var "id"}}.advance(uint8(v0.entry))
|
|
||||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CL, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
// val3 := br{{ var "id"}}.peekTopBits(peekBits)
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
MOVQ br_value, AX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
|
|
||||||
// v3 := table[val1&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v1
|
|
||||||
|
|
||||||
// br{{ var "id"}}.advance(uint8(v1.entry))
|
|
||||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CX, br_value // value <<= n
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
|
|
||||||
// these two writes get coalesced
|
|
||||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
|
||||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
|
||||||
MOVW BX, {{ var "bufofs" }}+2(buffer)(off*1)
|
|
||||||
|
|
||||||
// update the bitrader reader structure
|
|
||||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }})
|
|
||||||
MOVQ br_value, bitReaderShifted_value(br{{ var "id" }})
|
|
||||||
MOVQ br_offset, bitReaderShifted_off(br{{ var "id" }})
|
|
||||||
{{ end }}
|
|
||||||
|
|
||||||
{{ set "id" "0" }}
|
|
||||||
{{ set "ofs" "0" }}
|
|
||||||
{{ set "bufofs" "0" }} {{/* id * bufoff */}}
|
|
||||||
{{ template "decode_2_values_x86" . }}
|
|
||||||
|
|
||||||
{{ set "id" "1" }}
|
|
||||||
{{ set "ofs" "8" }}
|
|
||||||
{{ set "bufofs" "256" }}
|
|
||||||
{{ template "decode_2_values_x86" . }}
|
|
||||||
|
|
||||||
{{ set "id" "2" }}
|
|
||||||
{{ set "ofs" "16" }}
|
|
||||||
{{ set "bufofs" "512" }}
|
|
||||||
{{ template "decode_2_values_x86" . }}
|
|
||||||
|
|
||||||
{{ set "id" "3" }}
|
|
||||||
{{ set "ofs" "24" }}
|
|
||||||
{{ set "bufofs" "768" }}
|
|
||||||
{{ template "decode_2_values_x86" . }}
|
|
||||||
|
|
||||||
ADDQ $4, off // off += 2
|
|
||||||
|
|
||||||
TESTB DH, DH // any br[i].ofs < 4?
|
|
||||||
JNZ end
|
|
||||||
|
|
||||||
CMPQ off, $bufoff
|
|
||||||
JL main_loop
|
|
||||||
end:
|
|
||||||
MOVQ 0(SP), BP
|
|
||||||
|
|
||||||
MOVB off, ret+56(FP)
|
|
||||||
RET
|
|
||||||
#undef off
|
|
||||||
#undef buffer
|
|
||||||
#undef table
|
|
||||||
|
|
||||||
#undef br_bits_read
|
|
||||||
#undef br_value
|
|
||||||
#undef br_offset
|
|
||||||
#undef peek_bits
|
|
||||||
#undef exhausted
|
|
||||||
|
|
||||||
#undef br0
|
|
||||||
#undef br1
|
|
||||||
#undef br2
|
|
||||||
#undef br3
|
|
|
@ -2,30 +2,40 @@
|
||||||
// +build amd64,!appengine,!noasm,gc
|
// +build amd64,!appengine,!noasm,gc
|
||||||
|
|
||||||
// This file contains the specialisation of Decoder.Decompress4X
|
// This file contains the specialisation of Decoder.Decompress4X
|
||||||
// that uses an asm implementation of its main loop.
|
// and Decoder.Decompress1X that use an asm implementation of thir main loops.
|
||||||
package huff0
|
package huff0
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/klauspost/compress/internal/cpuinfo"
|
||||||
)
|
)
|
||||||
|
|
||||||
// decompress4x_main_loop_x86 is an x86 assembler implementation
|
// decompress4x_main_loop_x86 is an x86 assembler implementation
|
||||||
// of Decompress4X when tablelog > 8.
|
// of Decompress4X when tablelog > 8.
|
||||||
// go:noescape
|
//go:noescape
|
||||||
func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
func decompress4x_main_loop_amd64(ctx *decompress4xContext)
|
||||||
peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
|
|
||||||
|
|
||||||
// decompress4x_8b_loop_x86 is an x86 assembler implementation
|
// decompress4x_8b_loop_x86 is an x86 assembler implementation
|
||||||
// of Decompress4X when tablelog <= 8 which decodes 4 entries
|
// of Decompress4X when tablelog <= 8 which decodes 4 entries
|
||||||
// per loop.
|
// per loop.
|
||||||
// go:noescape
|
//go:noescape
|
||||||
func decompress4x_8b_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
|
||||||
peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
|
|
||||||
|
|
||||||
// fallback8BitSize is the size where using Go version is faster.
|
// fallback8BitSize is the size where using Go version is faster.
|
||||||
const fallback8BitSize = 800
|
const fallback8BitSize = 800
|
||||||
|
|
||||||
|
type decompress4xContext struct {
|
||||||
|
pbr *[4]bitReaderShifted
|
||||||
|
peekBits uint8
|
||||||
|
out *byte
|
||||||
|
dstEvery int
|
||||||
|
tbl *dEntrySingle
|
||||||
|
decoded int
|
||||||
|
limit *byte
|
||||||
|
}
|
||||||
|
|
||||||
// Decompress4X will decompress a 4X encoded stream.
|
// Decompress4X will decompress a 4X encoded stream.
|
||||||
// The length of the supplied input must match the end of a block exactly.
|
// The length of the supplied input must match the end of a block exactly.
|
||||||
// The *capacity* of the dst slice must match the destination size of
|
// The *capacity* of the dst slice must match the destination size of
|
||||||
|
@ -42,6 +52,7 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||||
if cap(dst) < fallback8BitSize && use8BitTables {
|
if cap(dst) < fallback8BitSize && use8BitTables {
|
||||||
return d.decompress4X8bit(dst, src)
|
return d.decompress4X8bit(dst, src)
|
||||||
}
|
}
|
||||||
|
|
||||||
var br [4]bitReaderShifted
|
var br [4]bitReaderShifted
|
||||||
// Decode "jump table"
|
// Decode "jump table"
|
||||||
start := 6
|
start := 6
|
||||||
|
@ -71,70 +82,25 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||||
const tlMask = tlSize - 1
|
const tlMask = tlSize - 1
|
||||||
single := d.dt.single[:tlSize]
|
single := d.dt.single[:tlSize]
|
||||||
|
|
||||||
// Use temp table to avoid bound checks/append penalty.
|
|
||||||
buf := d.buffer()
|
|
||||||
var off uint8
|
|
||||||
var decoded int
|
var decoded int
|
||||||
|
|
||||||
const debug = false
|
if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) {
|
||||||
|
ctx := decompress4xContext{
|
||||||
// see: bitReaderShifted.peekBitsFast()
|
pbr: &br,
|
||||||
peekBits := uint8((64 - d.actualTableLog) & 63)
|
peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast()
|
||||||
|
out: &out[0],
|
||||||
// Decode 2 values from each decoder/loop.
|
dstEvery: dstEvery,
|
||||||
const bufoff = 256
|
tbl: &single[0],
|
||||||
for {
|
limit: &out[dstEvery-4], // Always stop decoding when first buffer gets here to avoid writing OOB on last.
|
||||||
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if use8BitTables {
|
if use8BitTables {
|
||||||
off = decompress4x_8b_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
|
decompress4x_8b_main_loop_amd64(&ctx)
|
||||||
} else {
|
} else {
|
||||||
off = decompress4x_main_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
|
decompress4x_main_loop_amd64(&ctx)
|
||||||
}
|
|
||||||
if debug {
|
|
||||||
fmt.Print("DEBUG: ")
|
|
||||||
fmt.Printf("off=%d,", off)
|
|
||||||
for i := 0; i < 4; i++ {
|
|
||||||
fmt.Printf(" br[%d]={bitsRead=%d, value=%x, off=%d}",
|
|
||||||
i, br[i].bitsRead, br[i].value, br[i].off)
|
|
||||||
}
|
|
||||||
fmt.Println("")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if off != 0 {
|
decoded = ctx.decoded
|
||||||
break
|
out = out[decoded/4:]
|
||||||
}
|
|
||||||
|
|
||||||
if bufoff > dstEvery {
|
|
||||||
d.bufs.Put(buf)
|
|
||||||
return nil, errors.New("corruption detected: stream overrun 1")
|
|
||||||
}
|
|
||||||
copy(out, buf[0][:])
|
|
||||||
copy(out[dstEvery:], buf[1][:])
|
|
||||||
copy(out[dstEvery*2:], buf[2][:])
|
|
||||||
copy(out[dstEvery*3:], buf[3][:])
|
|
||||||
out = out[bufoff:]
|
|
||||||
decoded += bufoff * 4
|
|
||||||
// There must at least be 3 buffers left.
|
|
||||||
if len(out) < dstEvery*3 {
|
|
||||||
d.bufs.Put(buf)
|
|
||||||
return nil, errors.New("corruption detected: stream overrun 2")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if off > 0 {
|
|
||||||
ioff := int(off)
|
|
||||||
if len(out) < dstEvery*3+ioff {
|
|
||||||
d.bufs.Put(buf)
|
|
||||||
return nil, errors.New("corruption detected: stream overrun 3")
|
|
||||||
}
|
|
||||||
copy(out, buf[0][:off])
|
|
||||||
copy(out[dstEvery:], buf[1][:off])
|
|
||||||
copy(out[dstEvery*2:], buf[2][:off])
|
|
||||||
copy(out[dstEvery*3:], buf[3][:off])
|
|
||||||
decoded += int(off) * 4
|
|
||||||
out = out[off:]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode remaining.
|
// Decode remaining.
|
||||||
|
@ -150,7 +116,6 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||||
for bitsLeft > 0 {
|
for bitsLeft > 0 {
|
||||||
br.fill()
|
br.fill()
|
||||||
if offset >= endsAt {
|
if offset >= endsAt {
|
||||||
d.bufs.Put(buf)
|
|
||||||
return nil, errors.New("corruption detected: stream overrun 4")
|
return nil, errors.New("corruption detected: stream overrun 4")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -164,7 +129,6 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||||
offset++
|
offset++
|
||||||
}
|
}
|
||||||
if offset != endsAt {
|
if offset != endsAt {
|
||||||
d.bufs.Put(buf)
|
|
||||||
return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
|
return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
|
||||||
}
|
}
|
||||||
decoded += offset - dstEvery*i
|
decoded += offset - dstEvery*i
|
||||||
|
@ -173,9 +137,86 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
d.bufs.Put(buf)
|
|
||||||
if dstSize != decoded {
|
if dstSize != decoded {
|
||||||
return nil, errors.New("corruption detected: short output block")
|
return nil, errors.New("corruption detected: short output block")
|
||||||
}
|
}
|
||||||
return dst, nil
|
return dst, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// decompress4x_main_loop_x86 is an x86 assembler implementation
|
||||||
|
// of Decompress1X when tablelog > 8.
|
||||||
|
//go:noescape
|
||||||
|
func decompress1x_main_loop_amd64(ctx *decompress1xContext)
|
||||||
|
|
||||||
|
// decompress4x_main_loop_x86 is an x86 with BMI2 assembler implementation
|
||||||
|
// of Decompress1X when tablelog > 8.
|
||||||
|
//go:noescape
|
||||||
|
func decompress1x_main_loop_bmi2(ctx *decompress1xContext)
|
||||||
|
|
||||||
|
type decompress1xContext struct {
|
||||||
|
pbr *bitReaderShifted
|
||||||
|
peekBits uint8
|
||||||
|
out *byte
|
||||||
|
outCap int
|
||||||
|
tbl *dEntrySingle
|
||||||
|
decoded int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error reported by asm implementations
|
||||||
|
const error_max_decoded_size_exeeded = -1
|
||||||
|
|
||||||
|
// Decompress1X will decompress a 1X encoded stream.
|
||||||
|
// The cap of the output buffer will be the maximum decompressed size.
|
||||||
|
// The length of the supplied input must match the end of a block exactly.
|
||||||
|
func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
|
||||||
|
if len(d.dt.single) == 0 {
|
||||||
|
return nil, errors.New("no table loaded")
|
||||||
|
}
|
||||||
|
var br bitReaderShifted
|
||||||
|
err := br.init(src)
|
||||||
|
if err != nil {
|
||||||
|
return dst, err
|
||||||
|
}
|
||||||
|
maxDecodedSize := cap(dst)
|
||||||
|
dst = dst[:maxDecodedSize]
|
||||||
|
|
||||||
|
const tlSize = 1 << tableLogMax
|
||||||
|
const tlMask = tlSize - 1
|
||||||
|
|
||||||
|
if maxDecodedSize >= 4 {
|
||||||
|
ctx := decompress1xContext{
|
||||||
|
pbr: &br,
|
||||||
|
out: &dst[0],
|
||||||
|
outCap: maxDecodedSize,
|
||||||
|
peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast()
|
||||||
|
tbl: &d.dt.single[0],
|
||||||
|
}
|
||||||
|
|
||||||
|
if cpuinfo.HasBMI2() {
|
||||||
|
decompress1x_main_loop_bmi2(&ctx)
|
||||||
|
} else {
|
||||||
|
decompress1x_main_loop_amd64(&ctx)
|
||||||
|
}
|
||||||
|
if ctx.decoded == error_max_decoded_size_exeeded {
|
||||||
|
return nil, ErrMaxDecodedSizeExceeded
|
||||||
|
}
|
||||||
|
|
||||||
|
dst = dst[:ctx.decoded]
|
||||||
|
}
|
||||||
|
|
||||||
|
// br < 8, so uint8 is fine
|
||||||
|
bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
|
||||||
|
for bitsLeft > 0 {
|
||||||
|
br.fill()
|
||||||
|
if len(dst) >= maxDecodedSize {
|
||||||
|
br.close()
|
||||||
|
return nil, ErrMaxDecodedSizeExceeded
|
||||||
|
}
|
||||||
|
v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
|
||||||
|
nBits := uint8(v.entry)
|
||||||
|
br.advance(nBits)
|
||||||
|
bitsLeft -= nBits
|
||||||
|
dst = append(dst, uint8(v.entry>>8))
|
||||||
|
}
|
||||||
|
return dst, br.close()
|
||||||
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,195 +0,0 @@
|
||||||
// +build !appengine
|
|
||||||
// +build gc
|
|
||||||
// +build !noasm
|
|
||||||
|
|
||||||
#include "textflag.h"
|
|
||||||
#include "funcdata.h"
|
|
||||||
#include "go_asm.h"
|
|
||||||
|
|
||||||
#ifdef GOAMD64_v4
|
|
||||||
#ifndef GOAMD64_v3
|
|
||||||
#define GOAMD64_v3
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
|
||||||
|
|
||||||
//func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
|
||||||
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
|
||||||
TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8
|
|
||||||
#define off R8
|
|
||||||
#define buffer DI
|
|
||||||
#define table SI
|
|
||||||
|
|
||||||
#define br_bits_read R9
|
|
||||||
#define br_value R10
|
|
||||||
#define br_offset R11
|
|
||||||
#define peek_bits R12
|
|
||||||
#define exhausted DX
|
|
||||||
|
|
||||||
#define br0 R13
|
|
||||||
#define br1 R14
|
|
||||||
#define br2 R15
|
|
||||||
#define br3 BP
|
|
||||||
|
|
||||||
MOVQ BP, 0(SP)
|
|
||||||
|
|
||||||
XORQ exhausted, exhausted // exhausted = false
|
|
||||||
XORQ off, off // off = 0
|
|
||||||
|
|
||||||
MOVBQZX peekBits+32(FP), peek_bits
|
|
||||||
MOVQ buf+40(FP), buffer
|
|
||||||
MOVQ tbl+48(FP), table
|
|
||||||
|
|
||||||
MOVQ pbr0+0(FP), br0
|
|
||||||
MOVQ pbr1+8(FP), br1
|
|
||||||
MOVQ pbr2+16(FP), br2
|
|
||||||
MOVQ pbr3+24(FP), br3
|
|
||||||
|
|
||||||
main_loop:
|
|
||||||
{{ define "decode_2_values_x86" }}
|
|
||||||
// const stream = {{ var "id" }}
|
|
||||||
// br{{ var "id"}}.fillFast()
|
|
||||||
MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read
|
|
||||||
MOVQ bitReaderShifted_value(br{{ var "id" }}), br_value
|
|
||||||
MOVQ bitReaderShifted_off(br{{ var "id" }}), br_offset
|
|
||||||
|
|
||||||
// We must have at least 2 * max tablelog left
|
|
||||||
CMPQ br_bits_read, $64-22
|
|
||||||
JBE skip_fill{{ var "id" }}
|
|
||||||
|
|
||||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
|
||||||
SUBQ $4, br_offset // b.off -= 4
|
|
||||||
|
|
||||||
// v := b.in[b.off-4 : b.off]
|
|
||||||
// v = v[:4]
|
|
||||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
|
||||||
MOVQ bitReaderShifted_in(br{{ var "id" }}), AX
|
|
||||||
|
|
||||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
|
||||||
#ifdef GOAMD64_v3
|
|
||||||
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
|
||||||
#else
|
|
||||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
|
||||||
MOVQ br_bits_read, CX
|
|
||||||
SHLQ CL, AX
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ORQ AX, br_value
|
|
||||||
|
|
||||||
// exhausted = exhausted || (br{{ var "id"}}.off < 4)
|
|
||||||
CMPQ br_offset, $4
|
|
||||||
SETLT DL
|
|
||||||
ORB DL, DH
|
|
||||||
// }
|
|
||||||
skip_fill{{ var "id" }}:
|
|
||||||
|
|
||||||
// val0 := br{{ var "id"}}.peekTopBits(peekBits)
|
|
||||||
#ifdef GOAMD64_v3
|
|
||||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
|
||||||
#else
|
|
||||||
MOVQ br_value, AX
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// v0 := table[val0&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v0
|
|
||||||
|
|
||||||
// br{{ var "id"}}.advance(uint8(v0.entry))
|
|
||||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
|
||||||
|
|
||||||
#ifdef GOAMD64_v3
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLXQ AX, br_value, br_value // value <<= n
|
|
||||||
#else
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CL, br_value // value <<= n
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef GOAMD64_v3
|
|
||||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
|
||||||
#else
|
|
||||||
// val1 := br{{ var "id"}}.peekTopBits(peekBits)
|
|
||||||
MOVQ peek_bits, CX
|
|
||||||
MOVQ br_value, AX
|
|
||||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// v1 := table[val1&mask]
|
|
||||||
MOVW 0(table)(AX*2), AX // AX - v1
|
|
||||||
|
|
||||||
// br{{ var "id"}}.advance(uint8(v1.entry))
|
|
||||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
|
||||||
|
|
||||||
#ifdef GOAMD64_v3
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLXQ AX, br_value, br_value // value <<= n
|
|
||||||
#else
|
|
||||||
MOVBQZX AL, CX
|
|
||||||
SHLQ CL, br_value // value <<= n
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ADDQ CX, br_bits_read // bits_read += n
|
|
||||||
|
|
||||||
|
|
||||||
// these two writes get coalesced
|
|
||||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
|
||||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
|
||||||
MOVW BX, {{ var "bufofs" }}(buffer)(off*1)
|
|
||||||
|
|
||||||
// update the bitrader reader structure
|
|
||||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }})
|
|
||||||
MOVQ br_value, bitReaderShifted_value(br{{ var "id" }})
|
|
||||||
MOVQ br_offset, bitReaderShifted_off(br{{ var "id" }})
|
|
||||||
{{ end }}
|
|
||||||
|
|
||||||
{{ set "id" "0" }}
|
|
||||||
{{ set "ofs" "0" }}
|
|
||||||
{{ set "bufofs" "0" }} {{/* id * bufoff */}}
|
|
||||||
{{ template "decode_2_values_x86" . }}
|
|
||||||
|
|
||||||
{{ set "id" "1" }}
|
|
||||||
{{ set "ofs" "8" }}
|
|
||||||
{{ set "bufofs" "256" }}
|
|
||||||
{{ template "decode_2_values_x86" . }}
|
|
||||||
|
|
||||||
{{ set "id" "2" }}
|
|
||||||
{{ set "ofs" "16" }}
|
|
||||||
{{ set "bufofs" "512" }}
|
|
||||||
{{ template "decode_2_values_x86" . }}
|
|
||||||
|
|
||||||
{{ set "id" "3" }}
|
|
||||||
{{ set "ofs" "24" }}
|
|
||||||
{{ set "bufofs" "768" }}
|
|
||||||
{{ template "decode_2_values_x86" . }}
|
|
||||||
|
|
||||||
ADDQ $2, off // off += 2
|
|
||||||
|
|
||||||
TESTB DH, DH // any br[i].ofs < 4?
|
|
||||||
JNZ end
|
|
||||||
|
|
||||||
CMPQ off, $bufoff
|
|
||||||
JL main_loop
|
|
||||||
end:
|
|
||||||
MOVQ 0(SP), BP
|
|
||||||
|
|
||||||
MOVB off, ret+56(FP)
|
|
||||||
RET
|
|
||||||
#undef off
|
|
||||||
#undef buffer
|
|
||||||
#undef table
|
|
||||||
|
|
||||||
#undef br_bits_read
|
|
||||||
#undef br_value
|
|
||||||
#undef br_offset
|
|
||||||
#undef peek_bits
|
|
||||||
#undef exhausted
|
|
||||||
|
|
||||||
#undef br0
|
|
||||||
#undef br1
|
|
||||||
#undef br2
|
|
||||||
#undef br3
|
|
|
@ -191,3 +191,105 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||||
}
|
}
|
||||||
return dst, nil
|
return dst, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Decompress1X will decompress a 1X encoded stream.
|
||||||
|
// The cap of the output buffer will be the maximum decompressed size.
|
||||||
|
// The length of the supplied input must match the end of a block exactly.
|
||||||
|
func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
|
||||||
|
if len(d.dt.single) == 0 {
|
||||||
|
return nil, errors.New("no table loaded")
|
||||||
|
}
|
||||||
|
if use8BitTables && d.actualTableLog <= 8 {
|
||||||
|
return d.decompress1X8Bit(dst, src)
|
||||||
|
}
|
||||||
|
var br bitReaderShifted
|
||||||
|
err := br.init(src)
|
||||||
|
if err != nil {
|
||||||
|
return dst, err
|
||||||
|
}
|
||||||
|
maxDecodedSize := cap(dst)
|
||||||
|
dst = dst[:0]
|
||||||
|
|
||||||
|
// Avoid bounds check by always having full sized table.
|
||||||
|
const tlSize = 1 << tableLogMax
|
||||||
|
const tlMask = tlSize - 1
|
||||||
|
dt := d.dt.single[:tlSize]
|
||||||
|
|
||||||
|
// Use temp table to avoid bound checks/append penalty.
|
||||||
|
bufs := d.buffer()
|
||||||
|
buf := &bufs[0]
|
||||||
|
var off uint8
|
||||||
|
|
||||||
|
for br.off >= 8 {
|
||||||
|
br.fillFast()
|
||||||
|
v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
|
||||||
|
br.advance(uint8(v.entry))
|
||||||
|
buf[off+0] = uint8(v.entry >> 8)
|
||||||
|
|
||||||
|
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
|
||||||
|
br.advance(uint8(v.entry))
|
||||||
|
buf[off+1] = uint8(v.entry >> 8)
|
||||||
|
|
||||||
|
// Refill
|
||||||
|
br.fillFast()
|
||||||
|
|
||||||
|
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
|
||||||
|
br.advance(uint8(v.entry))
|
||||||
|
buf[off+2] = uint8(v.entry >> 8)
|
||||||
|
|
||||||
|
v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
|
||||||
|
br.advance(uint8(v.entry))
|
||||||
|
buf[off+3] = uint8(v.entry >> 8)
|
||||||
|
|
||||||
|
off += 4
|
||||||
|
if off == 0 {
|
||||||
|
if len(dst)+256 > maxDecodedSize {
|
||||||
|
br.close()
|
||||||
|
d.bufs.Put(bufs)
|
||||||
|
return nil, ErrMaxDecodedSizeExceeded
|
||||||
|
}
|
||||||
|
dst = append(dst, buf[:]...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(dst)+int(off) > maxDecodedSize {
|
||||||
|
d.bufs.Put(bufs)
|
||||||
|
br.close()
|
||||||
|
return nil, ErrMaxDecodedSizeExceeded
|
||||||
|
}
|
||||||
|
dst = append(dst, buf[:off]...)
|
||||||
|
|
||||||
|
// br < 8, so uint8 is fine
|
||||||
|
bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
|
||||||
|
for bitsLeft > 0 {
|
||||||
|
br.fill()
|
||||||
|
if false && br.bitsRead >= 32 {
|
||||||
|
if br.off >= 4 {
|
||||||
|
v := br.in[br.off-4:]
|
||||||
|
v = v[:4]
|
||||||
|
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
br.value = (br.value << 32) | uint64(low)
|
||||||
|
br.bitsRead -= 32
|
||||||
|
br.off -= 4
|
||||||
|
} else {
|
||||||
|
for br.off > 0 {
|
||||||
|
br.value = (br.value << 8) | uint64(br.in[br.off-1])
|
||||||
|
br.bitsRead -= 8
|
||||||
|
br.off--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(dst) >= maxDecodedSize {
|
||||||
|
d.bufs.Put(bufs)
|
||||||
|
br.close()
|
||||||
|
return nil, ErrMaxDecodedSizeExceeded
|
||||||
|
}
|
||||||
|
v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
|
||||||
|
nBits := uint8(v.entry)
|
||||||
|
br.advance(nBits)
|
||||||
|
bitsLeft -= nBits
|
||||||
|
dst = append(dst, uint8(v.entry>>8))
|
||||||
|
}
|
||||||
|
d.bufs.Put(bufs)
|
||||||
|
return dst, br.close()
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
// Package cpuinfo gives runtime info about the current CPU.
|
||||||
|
//
|
||||||
|
// This is a very limited module meant for use internally
|
||||||
|
// in this project. For more versatile solution check
|
||||||
|
// https://github.com/klauspost/cpuid.
|
||||||
|
package cpuinfo
|
||||||
|
|
||||||
|
// HasBMI1 checks whether an x86 CPU supports the BMI1 extension.
|
||||||
|
func HasBMI1() bool {
|
||||||
|
return hasBMI1
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasBMI2 checks whether an x86 CPU supports the BMI2 extension.
|
||||||
|
func HasBMI2() bool {
|
||||||
|
return hasBMI2
|
||||||
|
}
|
||||||
|
|
||||||
|
// DisableBMI2 will disable BMI2, for testing purposes.
|
||||||
|
// Call returned function to restore previous state.
|
||||||
|
func DisableBMI2() func() {
|
||||||
|
old := hasBMI2
|
||||||
|
hasBMI2 = false
|
||||||
|
return func() {
|
||||||
|
hasBMI2 = old
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasBMI checks whether an x86 CPU supports both BMI1 and BMI2 extensions.
|
||||||
|
func HasBMI() bool {
|
||||||
|
return HasBMI1() && HasBMI2()
|
||||||
|
}
|
||||||
|
|
||||||
|
var hasBMI1 bool
|
||||||
|
var hasBMI2 bool
|
11
vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go
generated
vendored
Normal file
11
vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.go
generated
vendored
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
//go:build amd64 && !appengine && !noasm && gc
|
||||||
|
// +build amd64,!appengine,!noasm,gc
|
||||||
|
|
||||||
|
package cpuinfo
|
||||||
|
|
||||||
|
// go:noescape
|
||||||
|
func x86extensions() (bmi1, bmi2 bool)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
hasBMI1, hasBMI2 = x86extensions()
|
||||||
|
}
|
36
vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s
generated
vendored
Normal file
36
vendor/github.com/klauspost/compress/internal/cpuinfo/cpuinfo_amd64.s
generated
vendored
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
// +build !appengine
|
||||||
|
// +build gc
|
||||||
|
// +build !noasm
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
#include "funcdata.h"
|
||||||
|
#include "go_asm.h"
|
||||||
|
|
||||||
|
TEXT ·x86extensions(SB), NOSPLIT, $0
|
||||||
|
// 1. determine max EAX value
|
||||||
|
XORQ AX, AX
|
||||||
|
CPUID
|
||||||
|
|
||||||
|
CMPQ AX, $7
|
||||||
|
JB unsupported
|
||||||
|
|
||||||
|
// 2. EAX = 7, ECX = 0 --- see Table 3-8 "Information Returned by CPUID Instruction"
|
||||||
|
MOVQ $7, AX
|
||||||
|
MOVQ $0, CX
|
||||||
|
CPUID
|
||||||
|
|
||||||
|
BTQ $3, BX // bit 3 = BMI1
|
||||||
|
SETCS AL
|
||||||
|
|
||||||
|
BTQ $8, BX // bit 8 = BMI2
|
||||||
|
SETCS AH
|
||||||
|
|
||||||
|
MOVB AL, bmi1+0(FP)
|
||||||
|
MOVB AH, bmi2+1(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
unsupported:
|
||||||
|
XORQ AX, AX
|
||||||
|
MOVB AL, bmi1+0(FP)
|
||||||
|
MOVB AL, bmi2+1(FP)
|
||||||
|
RET
|
|
@ -386,47 +386,31 @@ In practice this means that concurrency is often limited to utilizing about 3 co
|
||||||
|
|
||||||
### Benchmarks
|
### Benchmarks
|
||||||
|
|
||||||
These are some examples of performance compared to [datadog cgo library](https://github.com/DataDog/zstd).
|
|
||||||
|
|
||||||
The first two are streaming decodes and the last are smaller inputs.
|
The first two are streaming decodes and the last are smaller inputs.
|
||||||
|
|
||||||
|
Running on AMD Ryzen 9 3950X 16-Core Processor. AMD64 assembly used.
|
||||||
|
|
||||||
```
|
```
|
||||||
BenchmarkDecoderSilesia-8 3 385000067 ns/op 550.51 MB/s 5498 B/op 8 allocs/op
|
BenchmarkDecoderSilesia-32 5 206878840 ns/op 1024.50 MB/s 49808 B/op 43 allocs/op
|
||||||
BenchmarkDecoderSilesiaCgo-8 6 197666567 ns/op 1072.25 MB/s 270672 B/op 8 allocs/op
|
BenchmarkDecoderEnwik9-32 1 1271809000 ns/op 786.28 MB/s 72048 B/op 52 allocs/op
|
||||||
|
|
||||||
BenchmarkDecoderEnwik9-8 1 2027001600 ns/op 493.34 MB/s 10496 B/op 18 allocs/op
|
Concurrent blocks, performance:
|
||||||
BenchmarkDecoderEnwik9Cgo-8 2 979499200 ns/op 1020.93 MB/s 270672 B/op 8 allocs/op
|
|
||||||
|
|
||||||
Concurrent performance:
|
BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32 67356 17857 ns/op 10321.96 MB/s 22.48 pct 102 B/op 0 allocs/op
|
||||||
|
BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32 266656 4421 ns/op 26823.21 MB/s 11.89 pct 19 B/op 0 allocs/op
|
||||||
BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16 28915 42469 ns/op 4340.07 MB/s 114 B/op 0 allocs/op
|
BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32 20992 56842 ns/op 8477.17 MB/s 39.90 pct 754 B/op 0 allocs/op
|
||||||
BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16 116505 9965 ns/op 11900.16 MB/s 16 B/op 0 allocs/op
|
BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32 27456 43932 ns/op 9714.01 MB/s 33.27 pct 524 B/op 0 allocs/op
|
||||||
BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16 8952 134272 ns/op 3588.70 MB/s 915 B/op 0 allocs/op
|
BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32 78432 15047 ns/op 8319.15 MB/s 40.34 pct 66 B/op 0 allocs/op
|
||||||
BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16 11820 102538 ns/op 4161.90 MB/s 594 B/op 0 allocs/op
|
BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32 65800 18436 ns/op 8249.63 MB/s 37.75 pct 88 B/op 0 allocs/op
|
||||||
BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16 34782 34184 ns/op 3661.88 MB/s 60 B/op 0 allocs/op
|
BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32 102993 11523 ns/op 35546.09 MB/s 3.637 pct 143 B/op 0 allocs/op
|
||||||
BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16 27712 43447 ns/op 3500.58 MB/s 99 B/op 0 allocs/op
|
BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32 1000000 1070 ns/op 95720.98 MB/s 80.53 pct 3 B/op 0 allocs/op
|
||||||
BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16 62826 18750 ns/op 21845.10 MB/s 104 B/op 0 allocs/op
|
BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32 749802 1752 ns/op 70272.35 MB/s 100.0 pct 5 B/op 0 allocs/op
|
||||||
BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16 631545 1794 ns/op 57078.74 MB/s 2 B/op 0 allocs/op
|
BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32 22640 52934 ns/op 13263.37 MB/s 26.25 pct 1014 B/op 0 allocs/op
|
||||||
BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16 1690140 712 ns/op 172938.13 MB/s 1 B/op 0 allocs/op
|
BenchmarkDecoder_DecodeAllParallel/html.zst-32 226412 5232 ns/op 19572.27 MB/s 14.49 pct 20 B/op 0 allocs/op
|
||||||
BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16 10432 113593 ns/op 6180.73 MB/s 1143 B/op 0 allocs/op
|
BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32 923041 1276 ns/op 3194.71 MB/s 31.26 pct 0 B/op 0 allocs/op
|
||||||
BenchmarkDecoder_DecodeAllParallel/html.zst-16 113206 10671 ns/op 9596.27 MB/s 15 B/op 0 allocs/op
|
|
||||||
BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16 1530615 779 ns/op 5229.49 MB/s 0 B/op 0 allocs/op
|
|
||||||
|
|
||||||
BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16 65217 16192 ns/op 11383.34 MB/s 46 B/op 0 allocs/op
|
|
||||||
BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16 292671 4039 ns/op 29363.19 MB/s 6 B/op 0 allocs/op
|
|
||||||
BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16 26314 46021 ns/op 10470.43 MB/s 293 B/op 0 allocs/op
|
|
||||||
BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16 33897 34900 ns/op 12227.96 MB/s 205 B/op 0 allocs/op
|
|
||||||
BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16 104348 11433 ns/op 10949.01 MB/s 20 B/op 0 allocs/op
|
|
||||||
BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16 75949 15510 ns/op 9805.60 MB/s 32 B/op 0 allocs/op
|
|
||||||
BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16 173910 6756 ns/op 60624.29 MB/s 37 B/op 0 allocs/op
|
|
||||||
BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16 923076 1339 ns/op 76474.87 MB/s 1 B/op 0 allocs/op
|
|
||||||
BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16 922920 1351 ns/op 91102.57 MB/s 2 B/op 0 allocs/op
|
|
||||||
BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16 27649 43618 ns/op 16096.19 MB/s 407 B/op 0 allocs/op
|
|
||||||
BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16 279073 4160 ns/op 24614.18 MB/s 6 B/op 0 allocs/op
|
|
||||||
BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 1579 ns/op 2581.71 MB/s 0 B/op 0 allocs/op
|
|
||||||
```
|
```
|
||||||
|
|
||||||
This reflects the performance around May 2020, but this may be out of date.
|
This reflects the performance around May 2022, but this may be out of date.
|
||||||
|
|
||||||
## Zstd inside ZIP files
|
## Zstd inside ZIP files
|
||||||
|
|
||||||
|
|
|
@ -63,13 +63,6 @@ func (b *bitReader) get32BitsFast(n uint8) uint32 {
|
||||||
return v
|
return v
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *bitReader) get16BitsFast(n uint8) uint16 {
|
|
||||||
const regMask = 64 - 1
|
|
||||||
v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
|
|
||||||
b.bitsRead += n
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
|
|
||||||
// fillFast() will make sure at least 32 bits are available.
|
// fillFast() will make sure at least 32 bits are available.
|
||||||
// There must be at least 4 bytes available.
|
// There must be at least 4 bytes available.
|
||||||
func (b *bitReader) fillFast() {
|
func (b *bitReader) fillFast() {
|
||||||
|
|
|
@ -5,8 +5,6 @@
|
||||||
|
|
||||||
package zstd
|
package zstd
|
||||||
|
|
||||||
import "fmt"
|
|
||||||
|
|
||||||
// bitWriter will write bits.
|
// bitWriter will write bits.
|
||||||
// First bit will be LSB of the first byte of output.
|
// First bit will be LSB of the first byte of output.
|
||||||
type bitWriter struct {
|
type bitWriter struct {
|
||||||
|
@ -73,80 +71,6 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
|
||||||
b.nBits += bits
|
b.nBits += bits
|
||||||
}
|
}
|
||||||
|
|
||||||
// flush will flush all pending full bytes.
|
|
||||||
// There will be at least 56 bits available for writing when this has been called.
|
|
||||||
// Using flush32 is faster, but leaves less space for writing.
|
|
||||||
func (b *bitWriter) flush() {
|
|
||||||
v := b.nBits >> 3
|
|
||||||
switch v {
|
|
||||||
case 0:
|
|
||||||
case 1:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
)
|
|
||||||
case 2:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
)
|
|
||||||
case 3:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
byte(b.bitContainer>>16),
|
|
||||||
)
|
|
||||||
case 4:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
byte(b.bitContainer>>16),
|
|
||||||
byte(b.bitContainer>>24),
|
|
||||||
)
|
|
||||||
case 5:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
byte(b.bitContainer>>16),
|
|
||||||
byte(b.bitContainer>>24),
|
|
||||||
byte(b.bitContainer>>32),
|
|
||||||
)
|
|
||||||
case 6:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
byte(b.bitContainer>>16),
|
|
||||||
byte(b.bitContainer>>24),
|
|
||||||
byte(b.bitContainer>>32),
|
|
||||||
byte(b.bitContainer>>40),
|
|
||||||
)
|
|
||||||
case 7:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
byte(b.bitContainer>>16),
|
|
||||||
byte(b.bitContainer>>24),
|
|
||||||
byte(b.bitContainer>>32),
|
|
||||||
byte(b.bitContainer>>40),
|
|
||||||
byte(b.bitContainer>>48),
|
|
||||||
)
|
|
||||||
case 8:
|
|
||||||
b.out = append(b.out,
|
|
||||||
byte(b.bitContainer),
|
|
||||||
byte(b.bitContainer>>8),
|
|
||||||
byte(b.bitContainer>>16),
|
|
||||||
byte(b.bitContainer>>24),
|
|
||||||
byte(b.bitContainer>>32),
|
|
||||||
byte(b.bitContainer>>40),
|
|
||||||
byte(b.bitContainer>>48),
|
|
||||||
byte(b.bitContainer>>56),
|
|
||||||
)
|
|
||||||
default:
|
|
||||||
panic(fmt.Errorf("bits (%d) > 64", b.nBits))
|
|
||||||
}
|
|
||||||
b.bitContainer >>= v << 3
|
|
||||||
b.nBits &= 7
|
|
||||||
}
|
|
||||||
|
|
||||||
// flush32 will flush out, so there are at least 32 bits available for writing.
|
// flush32 will flush out, so there are at least 32 bits available for writing.
|
||||||
func (b *bitWriter) flush32() {
|
func (b *bitWriter) flush32() {
|
||||||
if b.nBits < 32 {
|
if b.nBits < 32 {
|
||||||
|
|
|
@ -5,9 +5,14 @@
|
||||||
package zstd
|
package zstd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/binary"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/klauspost/compress/huff0"
|
"github.com/klauspost/compress/huff0"
|
||||||
|
@ -38,14 +43,14 @@ const (
|
||||||
// maxCompressedBlockSize is the biggest allowed compressed block size (128KB)
|
// maxCompressedBlockSize is the biggest allowed compressed block size (128KB)
|
||||||
maxCompressedBlockSize = 128 << 10
|
maxCompressedBlockSize = 128 << 10
|
||||||
|
|
||||||
|
compressedBlockOverAlloc = 16
|
||||||
|
maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc
|
||||||
|
|
||||||
// Maximum possible block size (all Raw+Uncompressed).
|
// Maximum possible block size (all Raw+Uncompressed).
|
||||||
maxBlockSize = (1 << 21) - 1
|
maxBlockSize = (1 << 21) - 1
|
||||||
|
|
||||||
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header
|
maxMatchLen = 131074
|
||||||
maxCompressedLiteralSize = 1 << 18
|
maxSequences = 0x7f00 + 0xffff
|
||||||
maxRLELiteralSize = 1 << 20
|
|
||||||
maxMatchLen = 131074
|
|
||||||
maxSequences = 0x7f00 + 0xffff
|
|
||||||
|
|
||||||
// We support slightly less than the reference decoder to be able to
|
// We support slightly less than the reference decoder to be able to
|
||||||
// use ints on 32 bit archs.
|
// use ints on 32 bit archs.
|
||||||
|
@ -97,7 +102,6 @@ type blockDec struct {
|
||||||
|
|
||||||
// Block is RLE, this is the size.
|
// Block is RLE, this is the size.
|
||||||
RLESize uint32
|
RLESize uint32
|
||||||
tmp [4]byte
|
|
||||||
|
|
||||||
Type blockType
|
Type blockType
|
||||||
|
|
||||||
|
@ -136,7 +140,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
|
||||||
b.Type = blockType((bh >> 1) & 3)
|
b.Type = blockType((bh >> 1) & 3)
|
||||||
// find size.
|
// find size.
|
||||||
cSize := int(bh >> 3)
|
cSize := int(bh >> 3)
|
||||||
maxSize := maxBlockSize
|
maxSize := maxCompressedBlockSizeAlloc
|
||||||
switch b.Type {
|
switch b.Type {
|
||||||
case blockTypeReserved:
|
case blockTypeReserved:
|
||||||
return ErrReservedBlockType
|
return ErrReservedBlockType
|
||||||
|
@ -157,9 +161,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
|
||||||
println("Data size on stream:", cSize)
|
println("Data size on stream:", cSize)
|
||||||
}
|
}
|
||||||
b.RLESize = 0
|
b.RLESize = 0
|
||||||
maxSize = maxCompressedBlockSize
|
maxSize = maxCompressedBlockSizeAlloc
|
||||||
if windowSize < maxCompressedBlockSize && b.lowMem {
|
if windowSize < maxCompressedBlockSize && b.lowMem {
|
||||||
maxSize = int(windowSize)
|
maxSize = int(windowSize) + compressedBlockOverAlloc
|
||||||
}
|
}
|
||||||
if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
|
if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
|
@ -190,9 +194,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
|
||||||
// Read block data.
|
// Read block data.
|
||||||
if cap(b.dataStorage) < cSize {
|
if cap(b.dataStorage) < cSize {
|
||||||
if b.lowMem || cSize > maxCompressedBlockSize {
|
if b.lowMem || cSize > maxCompressedBlockSize {
|
||||||
b.dataStorage = make([]byte, 0, cSize)
|
b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
|
||||||
} else {
|
} else {
|
||||||
b.dataStorage = make([]byte, 0, maxCompressedBlockSize)
|
b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if cap(b.dst) <= maxSize {
|
if cap(b.dst) <= maxSize {
|
||||||
|
@ -360,14 +364,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
|
||||||
}
|
}
|
||||||
if cap(b.literalBuf) < litRegenSize {
|
if cap(b.literalBuf) < litRegenSize {
|
||||||
if b.lowMem {
|
if b.lowMem {
|
||||||
b.literalBuf = make([]byte, litRegenSize)
|
b.literalBuf = make([]byte, litRegenSize, litRegenSize+compressedBlockOverAlloc)
|
||||||
} else {
|
} else {
|
||||||
if litRegenSize > maxCompressedLiteralSize {
|
b.literalBuf = make([]byte, litRegenSize, maxCompressedBlockSize+compressedBlockOverAlloc)
|
||||||
// Exceptional
|
|
||||||
b.literalBuf = make([]byte, litRegenSize)
|
|
||||||
} else {
|
|
||||||
b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
literals = b.literalBuf[:litRegenSize]
|
literals = b.literalBuf[:litRegenSize]
|
||||||
|
@ -397,14 +396,14 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
|
||||||
// Ensure we have space to store it.
|
// Ensure we have space to store it.
|
||||||
if cap(b.literalBuf) < litRegenSize {
|
if cap(b.literalBuf) < litRegenSize {
|
||||||
if b.lowMem {
|
if b.lowMem {
|
||||||
b.literalBuf = make([]byte, 0, litRegenSize)
|
b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
|
||||||
} else {
|
} else {
|
||||||
b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
|
b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var err error
|
var err error
|
||||||
// Use our out buffer.
|
// Use our out buffer.
|
||||||
huff.MaxDecodedSize = maxCompressedBlockSize
|
huff.MaxDecodedSize = litRegenSize
|
||||||
if fourStreams {
|
if fourStreams {
|
||||||
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
|
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
|
||||||
} else {
|
} else {
|
||||||
|
@ -429,9 +428,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
|
||||||
// Ensure we have space to store it.
|
// Ensure we have space to store it.
|
||||||
if cap(b.literalBuf) < litRegenSize {
|
if cap(b.literalBuf) < litRegenSize {
|
||||||
if b.lowMem {
|
if b.lowMem {
|
||||||
b.literalBuf = make([]byte, 0, litRegenSize)
|
b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
|
||||||
} else {
|
} else {
|
||||||
b.literalBuf = make([]byte, 0, maxCompressedBlockSize)
|
b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
huff := hist.huffTree
|
huff := hist.huffTree
|
||||||
|
@ -448,7 +447,7 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
|
||||||
return in, err
|
return in, err
|
||||||
}
|
}
|
||||||
hist.huffTree = huff
|
hist.huffTree = huff
|
||||||
huff.MaxDecodedSize = maxCompressedBlockSize
|
huff.MaxDecodedSize = litRegenSize
|
||||||
// Use our out buffer.
|
// Use our out buffer.
|
||||||
if fourStreams {
|
if fourStreams {
|
||||||
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
|
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
|
||||||
|
@ -463,6 +462,8 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
|
||||||
if len(literals) != litRegenSize {
|
if len(literals) != litRegenSize {
|
||||||
return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
|
return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
|
||||||
}
|
}
|
||||||
|
// Re-cap to get extra size.
|
||||||
|
literals = b.literalBuf[:len(literals)]
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
|
printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
|
||||||
}
|
}
|
||||||
|
@ -486,10 +487,15 @@ func (b *blockDec) decodeCompressed(hist *history) error {
|
||||||
b.dst = append(b.dst, hist.decoders.literals...)
|
b.dst = append(b.dst, hist.decoders.literals...)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
err = hist.decoders.decodeSync(hist)
|
before := len(hist.decoders.out)
|
||||||
|
err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if hist.decoders.maxSyncLen > 0 {
|
||||||
|
hist.decoders.maxSyncLen += uint64(before)
|
||||||
|
hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out))
|
||||||
|
}
|
||||||
b.dst = hist.decoders.out
|
b.dst = hist.decoders.out
|
||||||
hist.recentOffsets = hist.decoders.prevOffset
|
hist.recentOffsets = hist.decoders.prevOffset
|
||||||
return nil
|
return nil
|
||||||
|
@ -632,6 +638,22 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
|
||||||
println("initializing sequences:", err)
|
println("initializing sequences:", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
// Extract blocks...
|
||||||
|
if false && hist.dict == nil {
|
||||||
|
fatalErr := func(err error) {
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn := fmt.Sprintf("n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", hist.decoders.nSeqs, len(hist.decoders.literals), hist.recentOffsets[0], hist.recentOffsets[1], hist.recentOffsets[2], hist.windowSize)
|
||||||
|
var buf bytes.Buffer
|
||||||
|
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.litLengths.fse))
|
||||||
|
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse))
|
||||||
|
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse))
|
||||||
|
buf.Write(in)
|
||||||
|
ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -650,6 +672,7 @@ func (b *blockDec) decodeSequences(hist *history) error {
|
||||||
}
|
}
|
||||||
hist.decoders.windowSize = hist.windowSize
|
hist.decoders.windowSize = hist.windowSize
|
||||||
hist.decoders.prevOffset = hist.recentOffsets
|
hist.decoders.prevOffset = hist.recentOffsets
|
||||||
|
|
||||||
err := hist.decoders.decode(b.sequence)
|
err := hist.decoders.decode(b.sequence)
|
||||||
hist.recentOffsets = hist.decoders.prevOffset
|
hist.recentOffsets = hist.decoders.prevOffset
|
||||||
return err
|
return err
|
||||||
|
|
|
@ -23,7 +23,7 @@ type byteBuffer interface {
|
||||||
readByte() (byte, error)
|
readByte() (byte, error)
|
||||||
|
|
||||||
// Skip n bytes.
|
// Skip n bytes.
|
||||||
skipN(n int) error
|
skipN(n int64) error
|
||||||
}
|
}
|
||||||
|
|
||||||
// in-memory buffer
|
// in-memory buffer
|
||||||
|
@ -52,10 +52,6 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
|
||||||
return r, nil
|
return r, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *byteBuf) remain() []byte {
|
|
||||||
return *b
|
|
||||||
}
|
|
||||||
|
|
||||||
func (b *byteBuf) readByte() (byte, error) {
|
func (b *byteBuf) readByte() (byte, error) {
|
||||||
bb := *b
|
bb := *b
|
||||||
if len(bb) < 1 {
|
if len(bb) < 1 {
|
||||||
|
@ -66,9 +62,12 @@ func (b *byteBuf) readByte() (byte, error) {
|
||||||
return r, nil
|
return r, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *byteBuf) skipN(n int) error {
|
func (b *byteBuf) skipN(n int64) error {
|
||||||
bb := *b
|
bb := *b
|
||||||
if len(bb) < n {
|
if n < 0 {
|
||||||
|
return fmt.Errorf("negative skip (%d) requested", n)
|
||||||
|
}
|
||||||
|
if int64(len(bb)) < n {
|
||||||
return io.ErrUnexpectedEOF
|
return io.ErrUnexpectedEOF
|
||||||
}
|
}
|
||||||
*b = bb[n:]
|
*b = bb[n:]
|
||||||
|
@ -124,9 +123,9 @@ func (r *readerWrapper) readByte() (byte, error) {
|
||||||
return r.tmp[0], nil
|
return r.tmp[0], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *readerWrapper) skipN(n int) error {
|
func (r *readerWrapper) skipN(n int64) error {
|
||||||
n2, err := io.CopyN(ioutil.Discard, r.r, int64(n))
|
n2, err := io.CopyN(ioutil.Discard, r.r, n)
|
||||||
if n2 != int64(n) {
|
if n2 != n {
|
||||||
err = io.ErrUnexpectedEOF
|
err = io.ErrUnexpectedEOF
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
|
|
|
@ -13,12 +13,6 @@ type byteReader struct {
|
||||||
off int
|
off int
|
||||||
}
|
}
|
||||||
|
|
||||||
// init will initialize the reader and set the input.
|
|
||||||
func (b *byteReader) init(in []byte) {
|
|
||||||
b.b = in
|
|
||||||
b.off = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// advance the stream b n bytes.
|
// advance the stream b n bytes.
|
||||||
func (b *byteReader) advance(n uint) {
|
func (b *byteReader) advance(n uint) {
|
||||||
b.off += int(n)
|
b.off += int(n)
|
||||||
|
|
|
@ -347,18 +347,23 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
|
||||||
}
|
}
|
||||||
frame.history.setDict(&dict)
|
frame.history.setDict(&dict)
|
||||||
}
|
}
|
||||||
|
if frame.WindowSize > d.o.maxWindowSize {
|
||||||
if frame.FrameContentSize != fcsUnknown && frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
|
if debugDecoder {
|
||||||
return dst, ErrDecoderSizeExceeded
|
println("window size exceeded:", frame.WindowSize, ">", d.o.maxWindowSize)
|
||||||
|
}
|
||||||
|
return dst, ErrWindowSizeExceeded
|
||||||
}
|
}
|
||||||
if frame.FrameContentSize < 1<<30 {
|
if frame.FrameContentSize != fcsUnknown {
|
||||||
// Never preallocate more than 1 GB up front.
|
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
|
||||||
|
return dst, ErrDecoderSizeExceeded
|
||||||
|
}
|
||||||
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
|
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
|
||||||
dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize))
|
dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)+compressedBlockOverAlloc)
|
||||||
copy(dst2, dst)
|
copy(dst2, dst)
|
||||||
dst = dst2
|
dst = dst2
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if cap(dst) == 0 {
|
if cap(dst) == 0 {
|
||||||
// Allocate len(input) * 2 by default if nothing is provided
|
// Allocate len(input) * 2 by default if nothing is provided
|
||||||
// and we didn't get frame content size.
|
// and we didn't get frame content size.
|
||||||
|
@ -437,7 +442,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
|
||||||
println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
|
println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(next.b) > 0 {
|
if !d.o.ignoreChecksum && len(next.b) > 0 {
|
||||||
n, err := d.current.crc.Write(next.b)
|
n, err := d.current.crc.Write(next.b)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if n != len(next.b) {
|
if n != len(next.b) {
|
||||||
|
@ -449,7 +454,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
|
||||||
got := d.current.crc.Sum64()
|
got := d.current.crc.Sum64()
|
||||||
var tmp [4]byte
|
var tmp [4]byte
|
||||||
binary.LittleEndian.PutUint32(tmp[:], uint32(got))
|
binary.LittleEndian.PutUint32(tmp[:], uint32(got))
|
||||||
if !bytes.Equal(tmp[:], next.d.checkCRC) && !ignoreCRC {
|
if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) {
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
|
println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
|
||||||
}
|
}
|
||||||
|
@ -533,9 +538,15 @@ func (d *Decoder) nextBlockSync() (ok bool) {
|
||||||
|
|
||||||
// Update/Check CRC
|
// Update/Check CRC
|
||||||
if d.frame.HasCheckSum {
|
if d.frame.HasCheckSum {
|
||||||
d.frame.crc.Write(d.current.b)
|
if !d.o.ignoreChecksum {
|
||||||
|
d.frame.crc.Write(d.current.b)
|
||||||
|
}
|
||||||
if d.current.d.Last {
|
if d.current.d.Last {
|
||||||
d.current.err = d.frame.checkCRC()
|
if !d.o.ignoreChecksum {
|
||||||
|
d.current.err = d.frame.checkCRC()
|
||||||
|
} else {
|
||||||
|
d.current.err = d.frame.consumeCRC()
|
||||||
|
}
|
||||||
if d.current.err != nil {
|
if d.current.err != nil {
|
||||||
println("CRC error:", d.current.err)
|
println("CRC error:", d.current.err)
|
||||||
return false
|
return false
|
||||||
|
@ -629,60 +640,18 @@ func (d *Decoder) startSyncDecoder(r io.Reader) error {
|
||||||
|
|
||||||
// Create Decoder:
|
// Create Decoder:
|
||||||
// ASYNC:
|
// ASYNC:
|
||||||
// Spawn 4 go routines.
|
// Spawn 3 go routines.
|
||||||
// 0: Read frames and decode blocks.
|
// 0: Read frames and decode block literals.
|
||||||
// 1: Decode block and literals. Receives hufftree and seqdecs, returns seqdecs and huff tree.
|
// 1: Decode sequences.
|
||||||
// 2: Wait for recentOffsets if needed. Decode sequences, send recentOffsets.
|
// 2: Execute sequences, send to output.
|
||||||
// 3: Wait for stream history, execute sequences, send stream history.
|
|
||||||
func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output chan decodeOutput) {
|
func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output chan decodeOutput) {
|
||||||
defer d.streamWg.Done()
|
defer d.streamWg.Done()
|
||||||
br := readerWrapper{r: r}
|
br := readerWrapper{r: r}
|
||||||
|
|
||||||
var seqPrepare = make(chan *blockDec, d.o.concurrent)
|
|
||||||
var seqDecode = make(chan *blockDec, d.o.concurrent)
|
var seqDecode = make(chan *blockDec, d.o.concurrent)
|
||||||
var seqExecute = make(chan *blockDec, d.o.concurrent)
|
var seqExecute = make(chan *blockDec, d.o.concurrent)
|
||||||
|
|
||||||
// Async 1: Prepare blocks...
|
// Async 1: Decode sequences...
|
||||||
go func() {
|
|
||||||
var hist history
|
|
||||||
var hasErr bool
|
|
||||||
for block := range seqPrepare {
|
|
||||||
if hasErr {
|
|
||||||
if block != nil {
|
|
||||||
seqDecode <- block
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if block.async.newHist != nil {
|
|
||||||
if debugDecoder {
|
|
||||||
println("Async 1: new history")
|
|
||||||
}
|
|
||||||
hist.reset()
|
|
||||||
if block.async.newHist.dict != nil {
|
|
||||||
hist.setDict(block.async.newHist.dict)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if block.err != nil || block.Type != blockTypeCompressed {
|
|
||||||
hasErr = block.err != nil
|
|
||||||
seqDecode <- block
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
remain, err := block.decodeLiterals(block.data, &hist)
|
|
||||||
block.err = err
|
|
||||||
hasErr = block.err != nil
|
|
||||||
if err == nil {
|
|
||||||
block.async.literals = hist.decoders.literals
|
|
||||||
block.async.seqData = remain
|
|
||||||
} else if debugDecoder {
|
|
||||||
println("decodeLiterals error:", err)
|
|
||||||
}
|
|
||||||
seqDecode <- block
|
|
||||||
}
|
|
||||||
close(seqDecode)
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Async 2: Decode sequences...
|
|
||||||
go func() {
|
go func() {
|
||||||
var hist history
|
var hist history
|
||||||
var hasErr bool
|
var hasErr bool
|
||||||
|
@ -696,7 +665,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||||
}
|
}
|
||||||
if block.async.newHist != nil {
|
if block.async.newHist != nil {
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
println("Async 2: new history, recent:", block.async.newHist.recentOffsets)
|
println("Async 1: new history, recent:", block.async.newHist.recentOffsets)
|
||||||
}
|
}
|
||||||
hist.decoders = block.async.newHist.decoders
|
hist.decoders = block.async.newHist.decoders
|
||||||
hist.recentOffsets = block.async.newHist.recentOffsets
|
hist.recentOffsets = block.async.newHist.recentOffsets
|
||||||
|
@ -750,7 +719,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||||
}
|
}
|
||||||
if block.async.newHist != nil {
|
if block.async.newHist != nil {
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
println("Async 3: new history")
|
println("Async 2: new history")
|
||||||
}
|
}
|
||||||
hist.windowSize = block.async.newHist.windowSize
|
hist.windowSize = block.async.newHist.windowSize
|
||||||
hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
|
hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
|
||||||
|
@ -837,6 +806,33 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||||
|
|
||||||
decodeStream:
|
decodeStream:
|
||||||
for {
|
for {
|
||||||
|
var hist history
|
||||||
|
var hasErr bool
|
||||||
|
|
||||||
|
decodeBlock := func(block *blockDec) {
|
||||||
|
if hasErr {
|
||||||
|
if block != nil {
|
||||||
|
seqDecode <- block
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if block.err != nil || block.Type != blockTypeCompressed {
|
||||||
|
hasErr = block.err != nil
|
||||||
|
seqDecode <- block
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
remain, err := block.decodeLiterals(block.data, &hist)
|
||||||
|
block.err = err
|
||||||
|
hasErr = block.err != nil
|
||||||
|
if err == nil {
|
||||||
|
block.async.literals = hist.decoders.literals
|
||||||
|
block.async.seqData = remain
|
||||||
|
} else if debugDecoder {
|
||||||
|
println("decodeLiterals error:", err)
|
||||||
|
}
|
||||||
|
seqDecode <- block
|
||||||
|
}
|
||||||
frame := d.frame
|
frame := d.frame
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
println("New frame...")
|
println("New frame...")
|
||||||
|
@ -863,7 +859,7 @@ decodeStream:
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
case dec := <-d.decoders:
|
case dec := <-d.decoders:
|
||||||
dec.sendErr(err)
|
dec.sendErr(err)
|
||||||
seqPrepare <- dec
|
decodeBlock(dec)
|
||||||
}
|
}
|
||||||
break decodeStream
|
break decodeStream
|
||||||
}
|
}
|
||||||
|
@ -883,6 +879,10 @@ decodeStream:
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
println("Alloc History:", h.allocFrameBuffer)
|
println("Alloc History:", h.allocFrameBuffer)
|
||||||
}
|
}
|
||||||
|
hist.reset()
|
||||||
|
if h.dict != nil {
|
||||||
|
hist.setDict(h.dict)
|
||||||
|
}
|
||||||
dec.async.newHist = &h
|
dec.async.newHist = &h
|
||||||
dec.async.fcs = frame.FrameContentSize
|
dec.async.fcs = frame.FrameContentSize
|
||||||
historySent = true
|
historySent = true
|
||||||
|
@ -909,7 +909,7 @@ decodeStream:
|
||||||
}
|
}
|
||||||
err = dec.err
|
err = dec.err
|
||||||
last := dec.Last
|
last := dec.Last
|
||||||
seqPrepare <- dec
|
decodeBlock(dec)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
break decodeStream
|
break decodeStream
|
||||||
}
|
}
|
||||||
|
@ -918,7 +918,7 @@ decodeStream:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
close(seqPrepare)
|
close(seqDecode)
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
d.frame.history.b = frameHistCache
|
d.frame.history.b = frameHistCache
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ type decoderOptions struct {
|
||||||
maxDecodedSize uint64
|
maxDecodedSize uint64
|
||||||
maxWindowSize uint64
|
maxWindowSize uint64
|
||||||
dicts []dict
|
dicts []dict
|
||||||
|
ignoreChecksum bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *decoderOptions) setDefault() {
|
func (o *decoderOptions) setDefault() {
|
||||||
|
@ -31,7 +32,7 @@ func (o *decoderOptions) setDefault() {
|
||||||
if o.concurrent > 4 {
|
if o.concurrent > 4 {
|
||||||
o.concurrent = 4
|
o.concurrent = 4
|
||||||
}
|
}
|
||||||
o.maxDecodedSize = 1 << 63
|
o.maxDecodedSize = 64 << 30
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithDecoderLowmem will set whether to use a lower amount of memory,
|
// WithDecoderLowmem will set whether to use a lower amount of memory,
|
||||||
|
@ -66,7 +67,7 @@ func WithDecoderConcurrency(n int) DOption {
|
||||||
// WithDecoderMaxMemory allows to set a maximum decoded size for in-memory
|
// WithDecoderMaxMemory allows to set a maximum decoded size for in-memory
|
||||||
// non-streaming operations or maximum window size for streaming operations.
|
// non-streaming operations or maximum window size for streaming operations.
|
||||||
// This can be used to control memory usage of potentially hostile content.
|
// This can be used to control memory usage of potentially hostile content.
|
||||||
// Maximum and default is 1 << 63 bytes.
|
// Maximum is 1 << 63 bytes. Default is 64GiB.
|
||||||
func WithDecoderMaxMemory(n uint64) DOption {
|
func WithDecoderMaxMemory(n uint64) DOption {
|
||||||
return func(o *decoderOptions) error {
|
return func(o *decoderOptions) error {
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
|
@ -112,3 +113,11 @@ func WithDecoderMaxWindow(size uint64) DOption {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IgnoreChecksum allows to forcibly ignore checksum checking.
|
||||||
|
func IgnoreChecksum(b bool) DOption {
|
||||||
|
return func(o *decoderOptions) error {
|
||||||
|
o.ignoreChecksum = b
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -156,8 +156,8 @@ encodeLoop:
|
||||||
panic("offset0 was 0")
|
panic("offset0 was 0")
|
||||||
}
|
}
|
||||||
|
|
||||||
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
|
|
||||||
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
|
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
|
||||||
|
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
|
||||||
candidateL := e.longTable[nextHashL]
|
candidateL := e.longTable[nextHashL]
|
||||||
candidateS := e.table[nextHashS]
|
candidateS := e.table[nextHashS]
|
||||||
|
|
||||||
|
@ -518,8 +518,8 @@ encodeLoop:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store this, since we have it.
|
// Store this, since we have it.
|
||||||
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
|
|
||||||
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
|
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
|
||||||
|
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
|
||||||
|
|
||||||
// We have at least 4 byte match.
|
// We have at least 4 byte match.
|
||||||
// No need to check backwards. We come straight from a match
|
// No need to check backwards. We come straight from a match
|
||||||
|
@ -674,8 +674,8 @@ encodeLoop:
|
||||||
panic("offset0 was 0")
|
panic("offset0 was 0")
|
||||||
}
|
}
|
||||||
|
|
||||||
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
|
|
||||||
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
|
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
|
||||||
|
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
|
||||||
candidateL := e.longTable[nextHashL]
|
candidateL := e.longTable[nextHashL]
|
||||||
candidateS := e.table[nextHashS]
|
candidateS := e.table[nextHashS]
|
||||||
|
|
||||||
|
@ -1047,8 +1047,8 @@ encodeLoop:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store this, since we have it.
|
// Store this, since we have it.
|
||||||
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
|
|
||||||
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
|
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
|
||||||
|
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
|
||||||
|
|
||||||
// We have at least 4 byte match.
|
// We have at least 4 byte match.
|
||||||
// No need to check backwards. We come straight from a match
|
// No need to check backwards. We come straight from a match
|
||||||
|
|
|
@ -127,8 +127,8 @@ encodeLoop:
|
||||||
panic("offset0 was 0")
|
panic("offset0 was 0")
|
||||||
}
|
}
|
||||||
|
|
||||||
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
|
|
||||||
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
|
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
|
||||||
|
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
|
||||||
candidateL := e.longTable[nextHashL]
|
candidateL := e.longTable[nextHashL]
|
||||||
candidateS := e.table[nextHashS]
|
candidateS := e.table[nextHashS]
|
||||||
|
|
||||||
|
@ -439,8 +439,8 @@ encodeLoop:
|
||||||
var t int32
|
var t int32
|
||||||
for {
|
for {
|
||||||
|
|
||||||
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
|
|
||||||
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
|
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
|
||||||
|
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
|
||||||
candidateL := e.longTable[nextHashL]
|
candidateL := e.longTable[nextHashL]
|
||||||
candidateS := e.table[nextHashS]
|
candidateS := e.table[nextHashS]
|
||||||
|
|
||||||
|
@ -785,8 +785,8 @@ encodeLoop:
|
||||||
panic("offset0 was 0")
|
panic("offset0 was 0")
|
||||||
}
|
}
|
||||||
|
|
||||||
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
|
|
||||||
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
|
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
|
||||||
|
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
|
||||||
candidateL := e.longTable[nextHashL]
|
candidateL := e.longTable[nextHashL]
|
||||||
candidateS := e.table[nextHashS]
|
candidateS := e.table[nextHashS]
|
||||||
|
|
||||||
|
@ -969,7 +969,7 @@ encodeLoop:
|
||||||
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
|
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
|
||||||
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
|
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
|
||||||
longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
|
longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
|
||||||
longHash2 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
|
longHash2 := hashLen(cv1, dFastLongTableBits, dFastLongLen)
|
||||||
e.longTable[longHash1] = te0
|
e.longTable[longHash1] = te0
|
||||||
e.longTable[longHash2] = te1
|
e.longTable[longHash2] = te1
|
||||||
e.markLongShardDirty(longHash1)
|
e.markLongShardDirty(longHash1)
|
||||||
|
@ -1002,8 +1002,8 @@ encodeLoop:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store this, since we have it.
|
// Store this, since we have it.
|
||||||
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
|
|
||||||
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
|
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
|
||||||
|
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
|
||||||
|
|
||||||
// We have at least 4 byte match.
|
// We have at least 4 byte match.
|
||||||
// No need to check backwards. We come straight from a match
|
// No need to check backwards. We come straight from a match
|
||||||
|
|
|
@ -528,8 +528,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
|
||||||
// If a non-single block is needed the encoder will reset again.
|
// If a non-single block is needed the encoder will reset again.
|
||||||
e.encoders <- enc
|
e.encoders <- enc
|
||||||
}()
|
}()
|
||||||
// Use single segments when above minimum window and below 1MB.
|
// Use single segments when above minimum window and below window size.
|
||||||
single := len(src) < 1<<20 && len(src) > MinWindowSize
|
single := len(src) <= e.o.windowSize && len(src) > MinWindowSize
|
||||||
if e.o.single != nil {
|
if e.o.single != nil {
|
||||||
single = *e.o.single
|
single = *e.o.single
|
||||||
}
|
}
|
||||||
|
@ -551,7 +551,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we can do everything in one block, prefer that.
|
// If we can do everything in one block, prefer that.
|
||||||
if len(src) <= maxCompressedBlockSize {
|
if len(src) <= e.o.blockSize {
|
||||||
enc.Reset(e.o.dict, true)
|
enc.Reset(e.o.dict, true)
|
||||||
// Slightly faster with no history and everything in one block.
|
// Slightly faster with no history and everything in one block.
|
||||||
if e.o.crc {
|
if e.o.crc {
|
||||||
|
|
|
@ -283,7 +283,7 @@ func WithNoEntropyCompression(b bool) EOption {
|
||||||
// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
|
// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
|
||||||
// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
|
// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
|
||||||
// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
|
// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
|
||||||
// If this is not specified, block encodes will automatically choose this based on the input size.
|
// If this is not specified, block encodes will automatically choose this based on the input size and the window size.
|
||||||
// This setting has no effect on streamed encodes.
|
// This setting has no effect on streamed encodes.
|
||||||
func WithSingleSegment(b bool) EOption {
|
func WithSingleSegment(b bool) EOption {
|
||||||
return func(o *encoderOptions) error {
|
return func(o *encoderOptions) error {
|
||||||
|
|
|
@ -106,7 +106,7 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||||
}
|
}
|
||||||
n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
|
n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
|
||||||
println("Skipping frame with", n, "bytes.")
|
println("Skipping frame with", n, "bytes.")
|
||||||
err = br.skipN(int(n))
|
err = br.skipN(int64(n))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
println("Reading discarded frame", err)
|
println("Reading discarded frame", err)
|
||||||
|
@ -231,20 +231,27 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||||
d.crc.Reset()
|
d.crc.Reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if d.WindowSize > d.o.maxWindowSize {
|
||||||
|
if debugDecoder {
|
||||||
|
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
|
||||||
|
}
|
||||||
|
return ErrWindowSizeExceeded
|
||||||
|
}
|
||||||
|
|
||||||
if d.WindowSize == 0 && d.SingleSegment {
|
if d.WindowSize == 0 && d.SingleSegment {
|
||||||
// We may not need window in this case.
|
// We may not need window in this case.
|
||||||
d.WindowSize = d.FrameContentSize
|
d.WindowSize = d.FrameContentSize
|
||||||
if d.WindowSize < MinWindowSize {
|
if d.WindowSize < MinWindowSize {
|
||||||
d.WindowSize = MinWindowSize
|
d.WindowSize = MinWindowSize
|
||||||
}
|
}
|
||||||
|
if d.WindowSize > d.o.maxDecodedSize {
|
||||||
|
if debugDecoder {
|
||||||
|
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
|
||||||
|
}
|
||||||
|
return ErrDecoderSizeExceeded
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if d.WindowSize > uint64(d.o.maxWindowSize) {
|
|
||||||
if debugDecoder {
|
|
||||||
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
|
|
||||||
}
|
|
||||||
return ErrWindowSizeExceeded
|
|
||||||
}
|
|
||||||
// The minimum Window_Size is 1 KB.
|
// The minimum Window_Size is 1 KB.
|
||||||
if d.WindowSize < MinWindowSize {
|
if d.WindowSize < MinWindowSize {
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
|
@ -253,10 +260,11 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||||
return ErrWindowSizeTooSmall
|
return ErrWindowSizeTooSmall
|
||||||
}
|
}
|
||||||
d.history.windowSize = int(d.WindowSize)
|
d.history.windowSize = int(d.WindowSize)
|
||||||
if d.o.lowMem && d.history.windowSize < maxBlockSize {
|
if !d.o.lowMem || d.history.windowSize < maxBlockSize {
|
||||||
|
// Alloc 2x window size if not low-mem, or very small window size.
|
||||||
d.history.allocFrameBuffer = d.history.windowSize * 2
|
d.history.allocFrameBuffer = d.history.windowSize * 2
|
||||||
// TODO: Maybe use FrameContent size
|
|
||||||
} else {
|
} else {
|
||||||
|
// Alloc with one additional block
|
||||||
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
|
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -290,13 +298,6 @@ func (d *frameDec) checkCRC() error {
|
||||||
if !d.HasCheckSum {
|
if !d.HasCheckSum {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
var tmp [4]byte
|
|
||||||
got := d.crc.Sum64()
|
|
||||||
// Flip to match file order.
|
|
||||||
tmp[0] = byte(got >> 0)
|
|
||||||
tmp[1] = byte(got >> 8)
|
|
||||||
tmp[2] = byte(got >> 16)
|
|
||||||
tmp[3] = byte(got >> 24)
|
|
||||||
|
|
||||||
// We can overwrite upper tmp now
|
// We can overwrite upper tmp now
|
||||||
want, err := d.rawInput.readSmall(4)
|
want, err := d.rawInput.readSmall(4)
|
||||||
|
@ -305,7 +306,19 @@ func (d *frameDec) checkCRC() error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if !bytes.Equal(tmp[:], want) && !ignoreCRC {
|
if d.o.ignoreChecksum {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var tmp [4]byte
|
||||||
|
got := d.crc.Sum64()
|
||||||
|
// Flip to match file order.
|
||||||
|
tmp[0] = byte(got >> 0)
|
||||||
|
tmp[1] = byte(got >> 8)
|
||||||
|
tmp[2] = byte(got >> 16)
|
||||||
|
tmp[3] = byte(got >> 24)
|
||||||
|
|
||||||
|
if !bytes.Equal(tmp[:], want) {
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
println("CRC Check Failed:", tmp[:], "!=", want)
|
println("CRC Check Failed:", tmp[:], "!=", want)
|
||||||
}
|
}
|
||||||
|
@ -317,6 +330,19 @@ func (d *frameDec) checkCRC() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// consumeCRC reads the checksum data if the frame has one.
|
||||||
|
func (d *frameDec) consumeCRC() error {
|
||||||
|
if d.HasCheckSum {
|
||||||
|
_, err := d.rawInput.readSmall(4)
|
||||||
|
if err != nil {
|
||||||
|
println("CRC missing?", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// runDecoder will create a sync decoder that will decode a block of data.
|
// runDecoder will create a sync decoder that will decode a block of data.
|
||||||
func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
||||||
saved := d.history.b
|
saved := d.history.b
|
||||||
|
@ -326,6 +352,19 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
||||||
d.history.ignoreBuffer = len(dst)
|
d.history.ignoreBuffer = len(dst)
|
||||||
// Store input length, so we only check new data.
|
// Store input length, so we only check new data.
|
||||||
crcStart := len(dst)
|
crcStart := len(dst)
|
||||||
|
d.history.decoders.maxSyncLen = 0
|
||||||
|
if d.FrameContentSize != fcsUnknown {
|
||||||
|
d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
|
||||||
|
if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
|
||||||
|
return dst, ErrDecoderSizeExceeded
|
||||||
|
}
|
||||||
|
if uint64(cap(dst)) < d.history.decoders.maxSyncLen {
|
||||||
|
// Alloc for output
|
||||||
|
dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
|
||||||
|
copy(dst2, dst)
|
||||||
|
dst = dst2
|
||||||
|
}
|
||||||
|
}
|
||||||
var err error
|
var err error
|
||||||
for {
|
for {
|
||||||
err = dec.reset(d.rawInput, d.WindowSize)
|
err = dec.reset(d.rawInput, d.WindowSize)
|
||||||
|
@ -360,13 +399,17 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
||||||
if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
|
if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
|
||||||
err = ErrFrameSizeMismatch
|
err = ErrFrameSizeMismatch
|
||||||
} else if d.HasCheckSum {
|
} else if d.HasCheckSum {
|
||||||
var n int
|
if d.o.ignoreChecksum {
|
||||||
n, err = d.crc.Write(dst[crcStart:])
|
err = d.consumeCRC()
|
||||||
if err == nil {
|
} else {
|
||||||
if n != len(dst)-crcStart {
|
var n int
|
||||||
err = io.ErrShortWrite
|
n, err = d.crc.Write(dst[crcStart:])
|
||||||
} else {
|
if err == nil {
|
||||||
err = d.checkCRC()
|
if n != len(dst)-crcStart {
|
||||||
|
err = io.ErrShortWrite
|
||||||
|
} else {
|
||||||
|
err = d.checkCRC()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,8 +5,10 @@
|
||||||
package zstd
|
package zstd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/binary"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -178,10 +180,32 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
|
||||||
return fmt.Errorf("corruption detected (total %d != %d)", gotTotal, 1<<s.actualTableLog)
|
return fmt.Errorf("corruption detected (total %d != %d)", gotTotal, 1<<s.actualTableLog)
|
||||||
}
|
}
|
||||||
b.advance((bitCount + 7) >> 3)
|
b.advance((bitCount + 7) >> 3)
|
||||||
// println(s.norm[:s.symbolLen], s.symbolLen)
|
|
||||||
return s.buildDtable()
|
return s.buildDtable()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *fseDecoder) mustReadFrom(r io.Reader) {
|
||||||
|
fatalErr := func(err error) {
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// dt [maxTablesize]decSymbol // Decompression table.
|
||||||
|
// symbolLen uint16 // Length of active part of the symbol table.
|
||||||
|
// actualTableLog uint8 // Selected tablelog.
|
||||||
|
// maxBits uint8 // Maximum number of additional bits
|
||||||
|
// // used for table creation to avoid allocations.
|
||||||
|
// stateTable [256]uint16
|
||||||
|
// norm [maxSymbolValue + 1]int16
|
||||||
|
// preDefined bool
|
||||||
|
fatalErr(binary.Read(r, binary.LittleEndian, &s.dt))
|
||||||
|
fatalErr(binary.Read(r, binary.LittleEndian, &s.symbolLen))
|
||||||
|
fatalErr(binary.Read(r, binary.LittleEndian, &s.actualTableLog))
|
||||||
|
fatalErr(binary.Read(r, binary.LittleEndian, &s.maxBits))
|
||||||
|
fatalErr(binary.Read(r, binary.LittleEndian, &s.stateTable))
|
||||||
|
fatalErr(binary.Read(r, binary.LittleEndian, &s.norm))
|
||||||
|
fatalErr(binary.Read(r, binary.LittleEndian, &s.preDefined))
|
||||||
|
}
|
||||||
|
|
||||||
// decSymbol contains information about a state entry,
|
// decSymbol contains information about a state entry,
|
||||||
// Including the state offset base, the output symbol and
|
// Including the state offset base, the output symbol and
|
||||||
// the number of bits to read for the low part of the destination state.
|
// the number of bits to read for the low part of the destination state.
|
||||||
|
@ -204,18 +228,10 @@ func (d decSymbol) newState() uint16 {
|
||||||
return uint16(d >> 16)
|
return uint16(d >> 16)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d decSymbol) baseline() uint32 {
|
|
||||||
return uint32(d >> 32)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d decSymbol) baselineInt() int {
|
func (d decSymbol) baselineInt() int {
|
||||||
return int(d >> 32)
|
return int(d >> 32)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) {
|
|
||||||
*d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *decSymbol) setNBits(nBits uint8) {
|
func (d *decSymbol) setNBits(nBits uint8) {
|
||||||
const mask = 0xffffffffffffff00
|
const mask = 0xffffffffffffff00
|
||||||
*d = (*d & mask) | decSymbol(nBits)
|
*d = (*d & mask) | decSymbol(nBits)
|
||||||
|
@ -231,11 +247,6 @@ func (d *decSymbol) setNewState(state uint16) {
|
||||||
*d = (*d & mask) | decSymbol(state)<<16
|
*d = (*d & mask) | decSymbol(state)<<16
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *decSymbol) setBaseline(baseline uint32) {
|
|
||||||
const mask = 0xffffffff
|
|
||||||
*d = (*d & mask) | decSymbol(baseline)<<32
|
|
||||||
}
|
|
||||||
|
|
||||||
func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
|
func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
|
||||||
const mask = 0xffff00ff
|
const mask = 0xffff00ff
|
||||||
*d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
|
*d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
|
||||||
|
@ -257,68 +268,6 @@ func (s *fseDecoder) setRLE(symbol decSymbol) {
|
||||||
s.dt[0] = symbol
|
s.dt[0] = symbol
|
||||||
}
|
}
|
||||||
|
|
||||||
// buildDtable will build the decoding table.
|
|
||||||
func (s *fseDecoder) buildDtable() error {
|
|
||||||
tableSize := uint32(1 << s.actualTableLog)
|
|
||||||
highThreshold := tableSize - 1
|
|
||||||
symbolNext := s.stateTable[:256]
|
|
||||||
|
|
||||||
// Init, lay down lowprob symbols
|
|
||||||
{
|
|
||||||
for i, v := range s.norm[:s.symbolLen] {
|
|
||||||
if v == -1 {
|
|
||||||
s.dt[highThreshold].setAddBits(uint8(i))
|
|
||||||
highThreshold--
|
|
||||||
symbolNext[i] = 1
|
|
||||||
} else {
|
|
||||||
symbolNext[i] = uint16(v)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Spread symbols
|
|
||||||
{
|
|
||||||
tableMask := tableSize - 1
|
|
||||||
step := tableStep(tableSize)
|
|
||||||
position := uint32(0)
|
|
||||||
for ss, v := range s.norm[:s.symbolLen] {
|
|
||||||
for i := 0; i < int(v); i++ {
|
|
||||||
s.dt[position].setAddBits(uint8(ss))
|
|
||||||
position = (position + step) & tableMask
|
|
||||||
for position > highThreshold {
|
|
||||||
// lowprob area
|
|
||||||
position = (position + step) & tableMask
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if position != 0 {
|
|
||||||
// position must reach all cells once, otherwise normalizedCounter is incorrect
|
|
||||||
return errors.New("corrupted input (position != 0)")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build Decoding table
|
|
||||||
{
|
|
||||||
tableSize := uint16(1 << s.actualTableLog)
|
|
||||||
for u, v := range s.dt[:tableSize] {
|
|
||||||
symbol := v.addBits()
|
|
||||||
nextState := symbolNext[symbol]
|
|
||||||
symbolNext[symbol] = nextState + 1
|
|
||||||
nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
|
|
||||||
s.dt[u&maxTableMask].setNBits(nBits)
|
|
||||||
newState := (nextState << nBits) - tableSize
|
|
||||||
if newState > tableSize {
|
|
||||||
return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
|
|
||||||
}
|
|
||||||
if newState == uint16(u) && nBits == 0 {
|
|
||||||
// Seems weird that this is possible with nbits > 0.
|
|
||||||
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
|
|
||||||
}
|
|
||||||
s.dt[u&maxTableMask].setNewState(newState)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// transform will transform the decoder table into a table usable for
|
// transform will transform the decoder table into a table usable for
|
||||||
// decoding without having to apply the transformation while decoding.
|
// decoding without having to apply the transformation while decoding.
|
||||||
// The state will contain the base value and the number of bits to read.
|
// The state will contain the base value and the number of bits to read.
|
||||||
|
@ -352,34 +301,7 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) {
|
||||||
s.state = dt[br.getBits(tableLog)]
|
s.state = dt[br.getBits(tableLog)]
|
||||||
}
|
}
|
||||||
|
|
||||||
// next returns the current symbol and sets the next state.
|
|
||||||
// At least tablelog bits must be available in the bit reader.
|
|
||||||
func (s *fseState) next(br *bitReader) {
|
|
||||||
lowBits := uint16(br.getBits(s.state.nbBits()))
|
|
||||||
s.state = s.dt[s.state.newState()+lowBits]
|
|
||||||
}
|
|
||||||
|
|
||||||
// finished returns true if all bits have been read from the bitstream
|
|
||||||
// and the next state would require reading bits from the input.
|
|
||||||
func (s *fseState) finished(br *bitReader) bool {
|
|
||||||
return br.finished() && s.state.nbBits() > 0
|
|
||||||
}
|
|
||||||
|
|
||||||
// final returns the current state symbol without decoding the next.
|
|
||||||
func (s *fseState) final() (int, uint8) {
|
|
||||||
return s.state.baselineInt(), s.state.addBits()
|
|
||||||
}
|
|
||||||
|
|
||||||
// final returns the current state symbol without decoding the next.
|
// final returns the current state symbol without decoding the next.
|
||||||
func (s decSymbol) final() (int, uint8) {
|
func (s decSymbol) final() (int, uint8) {
|
||||||
return s.baselineInt(), s.addBits()
|
return s.baselineInt(), s.addBits()
|
||||||
}
|
}
|
||||||
|
|
||||||
// nextFast returns the next symbol and sets the next state.
|
|
||||||
// This can only be used if no symbols are 0 bits.
|
|
||||||
// At least tablelog bits must be available in the bit reader.
|
|
||||||
func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
|
|
||||||
lowBits := br.get16BitsFast(s.state.nbBits())
|
|
||||||
s.state = s.dt[s.state.newState()+lowBits]
|
|
||||||
return s.state.baseline(), s.state.addBits()
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
//go:build amd64 && !appengine && !noasm && gc
|
||||||
|
// +build amd64,!appengine,!noasm,gc
|
||||||
|
|
||||||
|
package zstd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
type buildDtableAsmContext struct {
|
||||||
|
// inputs
|
||||||
|
stateTable *uint16
|
||||||
|
norm *int16
|
||||||
|
dt *uint64
|
||||||
|
|
||||||
|
// outputs --- set by the procedure in the case of error;
|
||||||
|
// for interpretation please see the error handling part below
|
||||||
|
errParam1 uint64
|
||||||
|
errParam2 uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildDtable_asm is an x86 assembly implementation of fseDecoder.buildDtable.
|
||||||
|
// Function returns non-zero exit code on error.
|
||||||
|
// go:noescape
|
||||||
|
func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
|
||||||
|
|
||||||
|
// please keep in sync with _generate/gen_fse.go
|
||||||
|
const (
|
||||||
|
errorCorruptedNormalizedCounter = 1
|
||||||
|
errorNewStateTooBig = 2
|
||||||
|
errorNewStateNoBits = 3
|
||||||
|
)
|
||||||
|
|
||||||
|
// buildDtable will build the decoding table.
|
||||||
|
func (s *fseDecoder) buildDtable() error {
|
||||||
|
ctx := buildDtableAsmContext{
|
||||||
|
stateTable: &s.stateTable[0],
|
||||||
|
norm: &s.norm[0],
|
||||||
|
dt: (*uint64)(&s.dt[0]),
|
||||||
|
}
|
||||||
|
code := buildDtable_asm(s, &ctx)
|
||||||
|
|
||||||
|
if code != 0 {
|
||||||
|
switch code {
|
||||||
|
case errorCorruptedNormalizedCounter:
|
||||||
|
position := ctx.errParam1
|
||||||
|
return fmt.Errorf("corrupted input (position=%d, expected 0)", position)
|
||||||
|
|
||||||
|
case errorNewStateTooBig:
|
||||||
|
newState := decSymbol(ctx.errParam1)
|
||||||
|
size := ctx.errParam2
|
||||||
|
return fmt.Errorf("newState (%d) outside table size (%d)", newState, size)
|
||||||
|
|
||||||
|
case errorNewStateNoBits:
|
||||||
|
newState := decSymbol(ctx.errParam1)
|
||||||
|
oldState := decSymbol(ctx.errParam2)
|
||||||
|
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, oldState)
|
||||||
|
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("buildDtable_asm returned unhandled nonzero code = %d", code)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
|
@ -0,0 +1,127 @@
|
||||||
|
// Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT.
|
||||||
|
|
||||||
|
//go:build !appengine && !noasm && gc && !noasm
|
||||||
|
// +build !appengine,!noasm,gc,!noasm
|
||||||
|
|
||||||
|
// func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
|
||||||
|
TEXT ·buildDtable_asm(SB), $0-24
|
||||||
|
MOVQ ctx+8(FP), CX
|
||||||
|
MOVQ s+0(FP), DI
|
||||||
|
|
||||||
|
// Load values
|
||||||
|
MOVBQZX 4098(DI), DX
|
||||||
|
XORQ AX, AX
|
||||||
|
BTSQ DX, AX
|
||||||
|
MOVQ (CX), BX
|
||||||
|
MOVQ 16(CX), SI
|
||||||
|
LEAQ -1(AX), R8
|
||||||
|
MOVQ 8(CX), CX
|
||||||
|
MOVWQZX 4096(DI), DI
|
||||||
|
|
||||||
|
// End load values
|
||||||
|
// Init, lay down lowprob symbols
|
||||||
|
XORQ R9, R9
|
||||||
|
JMP init_main_loop_condition
|
||||||
|
|
||||||
|
init_main_loop:
|
||||||
|
MOVWQSX (CX)(R9*2), R10
|
||||||
|
CMPW R10, $-1
|
||||||
|
JNE do_not_update_high_threshold
|
||||||
|
MOVB R9, 1(SI)(R8*8)
|
||||||
|
DECQ R8
|
||||||
|
MOVQ $0x0000000000000001, R10
|
||||||
|
|
||||||
|
do_not_update_high_threshold:
|
||||||
|
MOVW R10, (BX)(R9*2)
|
||||||
|
INCQ R9
|
||||||
|
|
||||||
|
init_main_loop_condition:
|
||||||
|
CMPQ R9, DI
|
||||||
|
JL init_main_loop
|
||||||
|
|
||||||
|
// Spread symbols
|
||||||
|
// Calculate table step
|
||||||
|
MOVQ AX, R9
|
||||||
|
SHRQ $0x01, R9
|
||||||
|
MOVQ AX, R10
|
||||||
|
SHRQ $0x03, R10
|
||||||
|
LEAQ 3(R9)(R10*1), R9
|
||||||
|
|
||||||
|
// Fill add bits values
|
||||||
|
LEAQ -1(AX), R10
|
||||||
|
XORQ R11, R11
|
||||||
|
XORQ R12, R12
|
||||||
|
JMP spread_main_loop_condition
|
||||||
|
|
||||||
|
spread_main_loop:
|
||||||
|
XORQ R13, R13
|
||||||
|
MOVWQSX (CX)(R12*2), R14
|
||||||
|
JMP spread_inner_loop_condition
|
||||||
|
|
||||||
|
spread_inner_loop:
|
||||||
|
MOVB R12, 1(SI)(R11*8)
|
||||||
|
|
||||||
|
adjust_position:
|
||||||
|
ADDQ R9, R11
|
||||||
|
ANDQ R10, R11
|
||||||
|
CMPQ R11, R8
|
||||||
|
JG adjust_position
|
||||||
|
INCQ R13
|
||||||
|
|
||||||
|
spread_inner_loop_condition:
|
||||||
|
CMPQ R13, R14
|
||||||
|
JL spread_inner_loop
|
||||||
|
INCQ R12
|
||||||
|
|
||||||
|
spread_main_loop_condition:
|
||||||
|
CMPQ R12, DI
|
||||||
|
JL spread_main_loop
|
||||||
|
TESTQ R11, R11
|
||||||
|
JZ spread_check_ok
|
||||||
|
MOVQ ctx+8(FP), AX
|
||||||
|
MOVQ R11, 24(AX)
|
||||||
|
MOVQ $+1, ret+16(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
spread_check_ok:
|
||||||
|
// Build Decoding table
|
||||||
|
XORQ DI, DI
|
||||||
|
|
||||||
|
build_table_main_table:
|
||||||
|
MOVBQZX 1(SI)(DI*8), CX
|
||||||
|
MOVWQZX (BX)(CX*2), R8
|
||||||
|
LEAQ 1(R8), R9
|
||||||
|
MOVW R9, (BX)(CX*2)
|
||||||
|
MOVQ R8, R9
|
||||||
|
BSRQ R9, R9
|
||||||
|
MOVQ DX, CX
|
||||||
|
SUBQ R9, CX
|
||||||
|
SHLQ CL, R8
|
||||||
|
SUBQ AX, R8
|
||||||
|
MOVB CL, (SI)(DI*8)
|
||||||
|
MOVW R8, 2(SI)(DI*8)
|
||||||
|
CMPQ R8, AX
|
||||||
|
JLE build_table_check1_ok
|
||||||
|
MOVQ ctx+8(FP), CX
|
||||||
|
MOVQ R8, 24(CX)
|
||||||
|
MOVQ AX, 32(CX)
|
||||||
|
MOVQ $+2, ret+16(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
build_table_check1_ok:
|
||||||
|
TESTB CL, CL
|
||||||
|
JNZ build_table_check2_ok
|
||||||
|
CMPW R8, DI
|
||||||
|
JNE build_table_check2_ok
|
||||||
|
MOVQ ctx+8(FP), AX
|
||||||
|
MOVQ R8, 24(AX)
|
||||||
|
MOVQ DI, 32(AX)
|
||||||
|
MOVQ $+3, ret+16(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
build_table_check2_ok:
|
||||||
|
INCQ DI
|
||||||
|
CMPQ DI, AX
|
||||||
|
JL build_table_main_table
|
||||||
|
MOVQ $+0, ret+16(FP)
|
||||||
|
RET
|
|
@ -0,0 +1,72 @@
|
||||||
|
//go:build !amd64 || appengine || !gc || noasm
|
||||||
|
// +build !amd64 appengine !gc noasm
|
||||||
|
|
||||||
|
package zstd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// buildDtable will build the decoding table.
|
||||||
|
func (s *fseDecoder) buildDtable() error {
|
||||||
|
tableSize := uint32(1 << s.actualTableLog)
|
||||||
|
highThreshold := tableSize - 1
|
||||||
|
symbolNext := s.stateTable[:256]
|
||||||
|
|
||||||
|
// Init, lay down lowprob symbols
|
||||||
|
{
|
||||||
|
for i, v := range s.norm[:s.symbolLen] {
|
||||||
|
if v == -1 {
|
||||||
|
s.dt[highThreshold].setAddBits(uint8(i))
|
||||||
|
highThreshold--
|
||||||
|
symbolNext[i] = 1
|
||||||
|
} else {
|
||||||
|
symbolNext[i] = uint16(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Spread symbols
|
||||||
|
{
|
||||||
|
tableMask := tableSize - 1
|
||||||
|
step := tableStep(tableSize)
|
||||||
|
position := uint32(0)
|
||||||
|
for ss, v := range s.norm[:s.symbolLen] {
|
||||||
|
for i := 0; i < int(v); i++ {
|
||||||
|
s.dt[position].setAddBits(uint8(ss))
|
||||||
|
position = (position + step) & tableMask
|
||||||
|
for position > highThreshold {
|
||||||
|
// lowprob area
|
||||||
|
position = (position + step) & tableMask
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if position != 0 {
|
||||||
|
// position must reach all cells once, otherwise normalizedCounter is incorrect
|
||||||
|
return errors.New("corrupted input (position != 0)")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build Decoding table
|
||||||
|
{
|
||||||
|
tableSize := uint16(1 << s.actualTableLog)
|
||||||
|
for u, v := range s.dt[:tableSize] {
|
||||||
|
symbol := v.addBits()
|
||||||
|
nextState := symbolNext[symbol]
|
||||||
|
symbolNext[symbol] = nextState + 1
|
||||||
|
nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
|
||||||
|
s.dt[u&maxTableMask].setNBits(nBits)
|
||||||
|
newState := (nextState << nBits) - tableSize
|
||||||
|
if newState > tableSize {
|
||||||
|
return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
|
||||||
|
}
|
||||||
|
if newState == uint16(u) && nBits == 0 {
|
||||||
|
// Seems weird that this is possible with nbits > 0.
|
||||||
|
return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
|
||||||
|
}
|
||||||
|
s.dt[u&maxTableMask].setNewState(newState)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
|
@ -76,21 +76,6 @@ func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) {
|
||||||
s.clearCount = maxCount != 0
|
s.clearCount = maxCount != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// prepare will prepare and allocate scratch tables used for both compression and decompression.
|
|
||||||
func (s *fseEncoder) prepare() (*fseEncoder, error) {
|
|
||||||
if s == nil {
|
|
||||||
s = &fseEncoder{}
|
|
||||||
}
|
|
||||||
s.useRLE = false
|
|
||||||
if s.clearCount && s.maxCount == 0 {
|
|
||||||
for i := range s.count {
|
|
||||||
s.count[i] = 0
|
|
||||||
}
|
|
||||||
s.clearCount = false
|
|
||||||
}
|
|
||||||
return s, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// allocCtable will allocate tables needed for compression.
|
// allocCtable will allocate tables needed for compression.
|
||||||
// If existing tables a re big enough, they are simply re-used.
|
// If existing tables a re big enough, they are simply re-used.
|
||||||
func (s *fseEncoder) allocCtable() {
|
func (s *fseEncoder) allocCtable() {
|
||||||
|
@ -709,14 +694,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) {
|
||||||
c.state = c.stateTable[lu]
|
c.state = c.stateTable[lu]
|
||||||
}
|
}
|
||||||
|
|
||||||
// encode the output symbol provided and write it to the bitstream.
|
|
||||||
func (c *cState) encode(symbolTT symbolTransform) {
|
|
||||||
nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
|
|
||||||
dstState := int32(c.state>>(nbBitsOut&15)) + int32(symbolTT.deltaFindState)
|
|
||||||
c.bw.addBits16NC(c.state, uint8(nbBitsOut))
|
|
||||||
c.state = c.stateTable[dstState]
|
|
||||||
}
|
|
||||||
|
|
||||||
// flush will write the tablelog to the output and flush the remaining full bytes.
|
// flush will write the tablelog to the output and flush the remaining full bytes.
|
||||||
func (c *cState) flush(tableLog uint8) {
|
func (c *cState) flush(tableLog uint8) {
|
||||||
c.bw.flush32()
|
c.bw.flush32()
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
//go:build ignorecrc
|
|
||||||
// +build ignorecrc
|
|
||||||
|
|
||||||
// Copyright 2019+ Klaus Post. All rights reserved.
|
|
||||||
// License information can be found in the LICENSE file.
|
|
||||||
// Based on work by Yann Collet, released under BSD License.
|
|
||||||
|
|
||||||
package zstd
|
|
||||||
|
|
||||||
// ignoreCRC can be used for fuzz testing to ignore CRC values...
|
|
||||||
const ignoreCRC = true
|
|
|
@ -1,11 +0,0 @@
|
||||||
//go:build !ignorecrc
|
|
||||||
// +build !ignorecrc
|
|
||||||
|
|
||||||
// Copyright 2019+ Klaus Post. All rights reserved.
|
|
||||||
// License information can be found in the LICENSE file.
|
|
||||||
// Based on work by Yann Collet, released under BSD License.
|
|
||||||
|
|
||||||
package zstd
|
|
||||||
|
|
||||||
// ignoreCRC can be used for fuzz testing to ignore CRC values...
|
|
||||||
const ignoreCRC = false
|
|
|
@ -33,9 +33,3 @@ func hashLen(u uint64, length, mls uint8) uint32 {
|
||||||
return (uint32(u) * prime4bytes) >> (32 - length)
|
return (uint32(u) * prime4bytes) >> (32 - length)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// hash3 returns the hash of the lower 3 bytes of u to fit in a hash table with h bits.
|
|
||||||
// Preferably h should be a constant and should always be <32.
|
|
||||||
func hash3(u uint32, h uint8) uint32 {
|
|
||||||
return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31)
|
|
||||||
}
|
|
||||||
|
|
|
@ -73,6 +73,7 @@ type sequenceDecs struct {
|
||||||
seqSize int
|
seqSize int
|
||||||
windowSize int
|
windowSize int
|
||||||
maxBits uint8
|
maxBits uint8
|
||||||
|
maxSyncLen uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialize all 3 decoders from the stream input.
|
// initialize all 3 decoders from the stream input.
|
||||||
|
@ -98,153 +99,13 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) erro
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// decode sequences from the stream with the provided history.
|
|
||||||
func (s *sequenceDecs) decode(seqs []seqVals) error {
|
|
||||||
br := s.br
|
|
||||||
|
|
||||||
// Grab full sizes tables, to avoid bounds checks.
|
|
||||||
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
|
|
||||||
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
|
|
||||||
s.seqSize = 0
|
|
||||||
litRemain := len(s.literals)
|
|
||||||
maxBlockSize := maxCompressedBlockSize
|
|
||||||
if s.windowSize < maxBlockSize {
|
|
||||||
maxBlockSize = s.windowSize
|
|
||||||
}
|
|
||||||
for i := range seqs {
|
|
||||||
var ll, mo, ml int
|
|
||||||
if br.off > 4+((maxOffsetBits+16+16)>>3) {
|
|
||||||
// inlined function:
|
|
||||||
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
|
|
||||||
|
|
||||||
// Final will not read from stream.
|
|
||||||
var llB, mlB, moB uint8
|
|
||||||
ll, llB = llState.final()
|
|
||||||
ml, mlB = mlState.final()
|
|
||||||
mo, moB = ofState.final()
|
|
||||||
|
|
||||||
// extra bits are stored in reverse order.
|
|
||||||
br.fillFast()
|
|
||||||
mo += br.getBits(moB)
|
|
||||||
if s.maxBits > 32 {
|
|
||||||
br.fillFast()
|
|
||||||
}
|
|
||||||
ml += br.getBits(mlB)
|
|
||||||
ll += br.getBits(llB)
|
|
||||||
|
|
||||||
if moB > 1 {
|
|
||||||
s.prevOffset[2] = s.prevOffset[1]
|
|
||||||
s.prevOffset[1] = s.prevOffset[0]
|
|
||||||
s.prevOffset[0] = mo
|
|
||||||
} else {
|
|
||||||
// mo = s.adjustOffset(mo, ll, moB)
|
|
||||||
// Inlined for rather big speedup
|
|
||||||
if ll == 0 {
|
|
||||||
// There is an exception though, when current sequence's literals_length = 0.
|
|
||||||
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
|
|
||||||
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
|
|
||||||
mo++
|
|
||||||
}
|
|
||||||
|
|
||||||
if mo == 0 {
|
|
||||||
mo = s.prevOffset[0]
|
|
||||||
} else {
|
|
||||||
var temp int
|
|
||||||
if mo == 3 {
|
|
||||||
temp = s.prevOffset[0] - 1
|
|
||||||
} else {
|
|
||||||
temp = s.prevOffset[mo]
|
|
||||||
}
|
|
||||||
|
|
||||||
if temp == 0 {
|
|
||||||
// 0 is not valid; input is corrupted; force offset to 1
|
|
||||||
println("WARNING: temp was 0")
|
|
||||||
temp = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
if mo != 1 {
|
|
||||||
s.prevOffset[2] = s.prevOffset[1]
|
|
||||||
}
|
|
||||||
s.prevOffset[1] = s.prevOffset[0]
|
|
||||||
s.prevOffset[0] = temp
|
|
||||||
mo = temp
|
|
||||||
}
|
|
||||||
}
|
|
||||||
br.fillFast()
|
|
||||||
} else {
|
|
||||||
if br.overread() {
|
|
||||||
if debugDecoder {
|
|
||||||
printf("reading sequence %d, exceeded available data\n", i)
|
|
||||||
}
|
|
||||||
return io.ErrUnexpectedEOF
|
|
||||||
}
|
|
||||||
ll, mo, ml = s.next(br, llState, mlState, ofState)
|
|
||||||
br.fill()
|
|
||||||
}
|
|
||||||
|
|
||||||
if debugSequences {
|
|
||||||
println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
|
|
||||||
}
|
|
||||||
// Evaluate.
|
|
||||||
// We might be doing this async, so do it early.
|
|
||||||
if mo == 0 && ml > 0 {
|
|
||||||
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
|
|
||||||
}
|
|
||||||
if ml > maxMatchLen {
|
|
||||||
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
|
|
||||||
}
|
|
||||||
s.seqSize += ll + ml
|
|
||||||
if s.seqSize > maxBlockSize {
|
|
||||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
|
||||||
}
|
|
||||||
litRemain -= ll
|
|
||||||
if litRemain < 0 {
|
|
||||||
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll)
|
|
||||||
}
|
|
||||||
seqs[i] = seqVals{
|
|
||||||
ll: ll,
|
|
||||||
ml: ml,
|
|
||||||
mo: mo,
|
|
||||||
}
|
|
||||||
if i == len(seqs)-1 {
|
|
||||||
// This is the last sequence, so we shouldn't update state.
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
// Manually inlined, ~ 5-20% faster
|
|
||||||
// Update all 3 states at once. Approx 20% faster.
|
|
||||||
nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
|
|
||||||
if nBits == 0 {
|
|
||||||
llState = llTable[llState.newState()&maxTableMask]
|
|
||||||
mlState = mlTable[mlState.newState()&maxTableMask]
|
|
||||||
ofState = ofTable[ofState.newState()&maxTableMask]
|
|
||||||
} else {
|
|
||||||
bits := br.get32BitsFast(nBits)
|
|
||||||
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
|
|
||||||
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
|
|
||||||
|
|
||||||
lowBits = uint16(bits >> (ofState.nbBits() & 31))
|
|
||||||
lowBits &= bitMask[mlState.nbBits()&15]
|
|
||||||
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
|
|
||||||
|
|
||||||
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
|
|
||||||
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s.seqSize += litRemain
|
|
||||||
if s.seqSize > maxBlockSize {
|
|
||||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
|
||||||
}
|
|
||||||
err := br.close()
|
|
||||||
if err != nil {
|
|
||||||
printf("Closing sequences: %v, %+v\n", err, *br)
|
|
||||||
}
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// execute will execute the decoded sequence with the provided history.
|
// execute will execute the decoded sequence with the provided history.
|
||||||
// The sequence must be evaluated before being sent.
|
// The sequence must be evaluated before being sent.
|
||||||
func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
|
func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
|
||||||
|
if len(s.dict) == 0 {
|
||||||
|
return s.executeSimple(seqs, hist)
|
||||||
|
}
|
||||||
|
|
||||||
// Ensure we have enough output size...
|
// Ensure we have enough output size...
|
||||||
if len(s.out)+s.seqSize > cap(s.out) {
|
if len(s.out)+s.seqSize > cap(s.out) {
|
||||||
addBytes := s.seqSize + len(s.out)
|
addBytes := s.seqSize + len(s.out)
|
||||||
|
@ -327,6 +188,7 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add final literals
|
// Add final literals
|
||||||
copy(out[t:], s.literals)
|
copy(out[t:], s.literals)
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
|
@ -341,14 +203,18 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// decode sequences from the stream with the provided history.
|
// decode sequences from the stream with the provided history.
|
||||||
func (s *sequenceDecs) decodeSync(history *history) error {
|
func (s *sequenceDecs) decodeSync(hist []byte) error {
|
||||||
|
supported, err := s.decodeSyncSimple(hist)
|
||||||
|
if supported {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
br := s.br
|
br := s.br
|
||||||
seqs := s.nSeqs
|
seqs := s.nSeqs
|
||||||
startSize := len(s.out)
|
startSize := len(s.out)
|
||||||
// Grab full sizes tables, to avoid bounds checks.
|
// Grab full sizes tables, to avoid bounds checks.
|
||||||
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
|
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
|
||||||
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
|
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
|
||||||
hist := history.b[history.ignoreBuffer:]
|
|
||||||
out := s.out
|
out := s.out
|
||||||
maxBlockSize := maxCompressedBlockSize
|
maxBlockSize := maxCompressedBlockSize
|
||||||
if s.windowSize < maxBlockSize {
|
if s.windowSize < maxBlockSize {
|
||||||
|
@ -433,7 +299,7 @@ func (s *sequenceDecs) decodeSync(history *history) error {
|
||||||
}
|
}
|
||||||
size := ll + ml + len(out)
|
size := ll + ml + len(out)
|
||||||
if size-startSize > maxBlockSize {
|
if size-startSize > maxBlockSize {
|
||||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
|
return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
|
||||||
}
|
}
|
||||||
if size > cap(out) {
|
if size > cap(out) {
|
||||||
// Not enough size, which can happen under high volume block streaming conditions
|
// Not enough size, which can happen under high volume block streaming conditions
|
||||||
|
@ -463,13 +329,13 @@ func (s *sequenceDecs) decodeSync(history *history) error {
|
||||||
|
|
||||||
if mo > len(out)+len(hist) || mo > s.windowSize {
|
if mo > len(out)+len(hist) || mo > s.windowSize {
|
||||||
if len(s.dict) == 0 {
|
if len(s.dict) == 0 {
|
||||||
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist))
|
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
// we may be in dictionary.
|
// we may be in dictionary.
|
||||||
dictO := len(s.dict) - (mo - (len(out) + len(hist)))
|
dictO := len(s.dict) - (mo - (len(out) + len(hist)))
|
||||||
if dictO < 0 || dictO >= len(s.dict) {
|
if dictO < 0 || dictO >= len(s.dict) {
|
||||||
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist))
|
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
|
||||||
}
|
}
|
||||||
end := dictO + ml
|
end := dictO + ml
|
||||||
if end > len(s.dict) {
|
if end > len(s.dict) {
|
||||||
|
@ -530,6 +396,7 @@ func (s *sequenceDecs) decodeSync(history *history) error {
|
||||||
ofState = ofTable[ofState.newState()&maxTableMask]
|
ofState = ofTable[ofState.newState()&maxTableMask]
|
||||||
} else {
|
} else {
|
||||||
bits := br.get32BitsFast(nBits)
|
bits := br.get32BitsFast(nBits)
|
||||||
|
|
||||||
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
|
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
|
||||||
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
|
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
|
||||||
|
|
||||||
|
@ -543,8 +410,8 @@ func (s *sequenceDecs) decodeSync(history *history) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if space for literals
|
// Check if space for literals
|
||||||
if len(s.literals)+len(s.out)-startSize > maxBlockSize {
|
if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
|
||||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", len(s.out), maxBlockSize)
|
return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add final literals
|
// Add final literals
|
||||||
|
@ -552,16 +419,6 @@ func (s *sequenceDecs) decodeSync(history *history) error {
|
||||||
return br.close()
|
return br.close()
|
||||||
}
|
}
|
||||||
|
|
||||||
// update states, at least 27 bits must be available.
|
|
||||||
func (s *sequenceDecs) update(br *bitReader) {
|
|
||||||
// Max 8 bits
|
|
||||||
s.litLengths.state.next(br)
|
|
||||||
// Max 9 bits
|
|
||||||
s.matchLengths.state.next(br)
|
|
||||||
// Max 8 bits
|
|
||||||
s.offsets.state.next(br)
|
|
||||||
}
|
|
||||||
|
|
||||||
var bitMask [16]uint16
|
var bitMask [16]uint16
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
@ -570,87 +427,6 @@ func init() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// update states, at least 27 bits must be available.
|
|
||||||
func (s *sequenceDecs) updateAlt(br *bitReader) {
|
|
||||||
// Update all 3 states at once. Approx 20% faster.
|
|
||||||
a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
|
|
||||||
|
|
||||||
nBits := a.nbBits() + b.nbBits() + c.nbBits()
|
|
||||||
if nBits == 0 {
|
|
||||||
s.litLengths.state.state = s.litLengths.state.dt[a.newState()]
|
|
||||||
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()]
|
|
||||||
s.offsets.state.state = s.offsets.state.dt[c.newState()]
|
|
||||||
return
|
|
||||||
}
|
|
||||||
bits := br.get32BitsFast(nBits)
|
|
||||||
lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
|
|
||||||
s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]
|
|
||||||
|
|
||||||
lowBits = uint16(bits >> (c.nbBits() & 31))
|
|
||||||
lowBits &= bitMask[b.nbBits()&15]
|
|
||||||
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits]
|
|
||||||
|
|
||||||
lowBits = uint16(bits) & bitMask[c.nbBits()&15]
|
|
||||||
s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits]
|
|
||||||
}
|
|
||||||
|
|
||||||
// nextFast will return new states when there are at least 4 unused bytes left on the stream when done.
|
|
||||||
func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
|
|
||||||
// Final will not read from stream.
|
|
||||||
ll, llB := llState.final()
|
|
||||||
ml, mlB := mlState.final()
|
|
||||||
mo, moB := ofState.final()
|
|
||||||
|
|
||||||
// extra bits are stored in reverse order.
|
|
||||||
br.fillFast()
|
|
||||||
mo += br.getBits(moB)
|
|
||||||
if s.maxBits > 32 {
|
|
||||||
br.fillFast()
|
|
||||||
}
|
|
||||||
ml += br.getBits(mlB)
|
|
||||||
ll += br.getBits(llB)
|
|
||||||
|
|
||||||
if moB > 1 {
|
|
||||||
s.prevOffset[2] = s.prevOffset[1]
|
|
||||||
s.prevOffset[1] = s.prevOffset[0]
|
|
||||||
s.prevOffset[0] = mo
|
|
||||||
return
|
|
||||||
}
|
|
||||||
// mo = s.adjustOffset(mo, ll, moB)
|
|
||||||
// Inlined for rather big speedup
|
|
||||||
if ll == 0 {
|
|
||||||
// There is an exception though, when current sequence's literals_length = 0.
|
|
||||||
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
|
|
||||||
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
|
|
||||||
mo++
|
|
||||||
}
|
|
||||||
|
|
||||||
if mo == 0 {
|
|
||||||
mo = s.prevOffset[0]
|
|
||||||
return
|
|
||||||
}
|
|
||||||
var temp int
|
|
||||||
if mo == 3 {
|
|
||||||
temp = s.prevOffset[0] - 1
|
|
||||||
} else {
|
|
||||||
temp = s.prevOffset[mo]
|
|
||||||
}
|
|
||||||
|
|
||||||
if temp == 0 {
|
|
||||||
// 0 is not valid; input is corrupted; force offset to 1
|
|
||||||
println("temp was 0")
|
|
||||||
temp = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
if mo != 1 {
|
|
||||||
s.prevOffset[2] = s.prevOffset[1]
|
|
||||||
}
|
|
||||||
s.prevOffset[1] = s.prevOffset[0]
|
|
||||||
s.prevOffset[0] = temp
|
|
||||||
mo = temp
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
|
func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
|
||||||
// Final will not read from stream.
|
// Final will not read from stream.
|
||||||
ll, llB := llState.final()
|
ll, llB := llState.final()
|
||||||
|
|
|
@ -0,0 +1,368 @@
|
||||||
|
//go:build amd64 && !appengine && !noasm && gc
|
||||||
|
// +build amd64,!appengine,!noasm,gc
|
||||||
|
|
||||||
|
package zstd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/klauspost/compress/internal/cpuinfo"
|
||||||
|
)
|
||||||
|
|
||||||
|
type decodeSyncAsmContext struct {
|
||||||
|
llTable []decSymbol
|
||||||
|
mlTable []decSymbol
|
||||||
|
ofTable []decSymbol
|
||||||
|
llState uint64
|
||||||
|
mlState uint64
|
||||||
|
ofState uint64
|
||||||
|
iteration int
|
||||||
|
litRemain int
|
||||||
|
out []byte
|
||||||
|
outPosition int
|
||||||
|
literals []byte
|
||||||
|
litPosition int
|
||||||
|
history []byte
|
||||||
|
windowSize int
|
||||||
|
ll int // set on error (not for all errors, please refer to _generate/gen.go)
|
||||||
|
ml int // set on error (not for all errors, please refer to _generate/gen.go)
|
||||||
|
mo int // set on error (not for all errors, please refer to _generate/gen.go)
|
||||||
|
}
|
||||||
|
|
||||||
|
// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
|
||||||
|
//
|
||||||
|
// Please refer to seqdec_generic.go for the reference implementation.
|
||||||
|
//go:noescape
|
||||||
|
func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
|
||||||
|
|
||||||
|
// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
|
||||||
|
//go:noescape
|
||||||
|
func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
|
||||||
|
|
||||||
|
// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
|
||||||
|
//go:noescape
|
||||||
|
func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
|
||||||
|
|
||||||
|
// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
|
||||||
|
//go:noescape
|
||||||
|
func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
|
||||||
|
|
||||||
|
// decode sequences from the stream with the provided history but without a dictionary.
|
||||||
|
func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
|
||||||
|
if len(s.dict) > 0 {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: Using unsafe memory copies leads to rare, random crashes
|
||||||
|
// with fuzz testing. It is therefore disabled for now.
|
||||||
|
const useSafe = true
|
||||||
|
/*
|
||||||
|
useSafe := false
|
||||||
|
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
|
||||||
|
useSafe = true
|
||||||
|
}
|
||||||
|
if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
|
||||||
|
useSafe = true
|
||||||
|
}
|
||||||
|
if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
|
||||||
|
useSafe = true
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
br := s.br
|
||||||
|
|
||||||
|
maxBlockSize := maxCompressedBlockSize
|
||||||
|
if s.windowSize < maxBlockSize {
|
||||||
|
maxBlockSize = s.windowSize
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := decodeSyncAsmContext{
|
||||||
|
llTable: s.litLengths.fse.dt[:maxTablesize],
|
||||||
|
mlTable: s.matchLengths.fse.dt[:maxTablesize],
|
||||||
|
ofTable: s.offsets.fse.dt[:maxTablesize],
|
||||||
|
llState: uint64(s.litLengths.state.state),
|
||||||
|
mlState: uint64(s.matchLengths.state.state),
|
||||||
|
ofState: uint64(s.offsets.state.state),
|
||||||
|
iteration: s.nSeqs - 1,
|
||||||
|
litRemain: len(s.literals),
|
||||||
|
out: s.out,
|
||||||
|
outPosition: len(s.out),
|
||||||
|
literals: s.literals,
|
||||||
|
windowSize: s.windowSize,
|
||||||
|
history: hist,
|
||||||
|
}
|
||||||
|
|
||||||
|
s.seqSize = 0
|
||||||
|
startSize := len(s.out)
|
||||||
|
|
||||||
|
var errCode int
|
||||||
|
if cpuinfo.HasBMI2() {
|
||||||
|
if useSafe {
|
||||||
|
errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx)
|
||||||
|
} else {
|
||||||
|
errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if useSafe {
|
||||||
|
errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx)
|
||||||
|
} else {
|
||||||
|
errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
switch errCode {
|
||||||
|
case noError:
|
||||||
|
break
|
||||||
|
|
||||||
|
case errorMatchLenOfsMismatch:
|
||||||
|
return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml)
|
||||||
|
|
||||||
|
case errorMatchLenTooBig:
|
||||||
|
return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml)
|
||||||
|
|
||||||
|
case errorMatchOffTooBig:
|
||||||
|
return true, fmt.Errorf("match offset (%d) bigger than current history (%d)",
|
||||||
|
ctx.mo, ctx.outPosition+len(hist)-startSize)
|
||||||
|
|
||||||
|
case errorNotEnoughLiterals:
|
||||||
|
return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
|
||||||
|
ctx.ll, ctx.litRemain+ctx.ll)
|
||||||
|
|
||||||
|
case errorNotEnoughSpace:
|
||||||
|
size := ctx.outPosition + ctx.ll + ctx.ml
|
||||||
|
if debugDecoder {
|
||||||
|
println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
|
||||||
|
}
|
||||||
|
return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
|
||||||
|
|
||||||
|
default:
|
||||||
|
return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
s.seqSize += ctx.litRemain
|
||||||
|
if s.seqSize > maxBlockSize {
|
||||||
|
return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||||
|
}
|
||||||
|
err := br.close()
|
||||||
|
if err != nil {
|
||||||
|
printf("Closing sequences: %v, %+v\n", err, *br)
|
||||||
|
return true, err
|
||||||
|
}
|
||||||
|
|
||||||
|
s.literals = s.literals[ctx.litPosition:]
|
||||||
|
t := ctx.outPosition
|
||||||
|
s.out = s.out[:t]
|
||||||
|
|
||||||
|
// Add final literals
|
||||||
|
s.out = append(s.out, s.literals...)
|
||||||
|
if debugDecoder {
|
||||||
|
t += len(s.literals)
|
||||||
|
if t != len(s.out) {
|
||||||
|
panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
type decodeAsmContext struct {
|
||||||
|
llTable []decSymbol
|
||||||
|
mlTable []decSymbol
|
||||||
|
ofTable []decSymbol
|
||||||
|
llState uint64
|
||||||
|
mlState uint64
|
||||||
|
ofState uint64
|
||||||
|
iteration int
|
||||||
|
seqs []seqVals
|
||||||
|
litRemain int
|
||||||
|
}
|
||||||
|
|
||||||
|
const noError = 0
|
||||||
|
|
||||||
|
// error reported when mo == 0 && ml > 0
|
||||||
|
const errorMatchLenOfsMismatch = 1
|
||||||
|
|
||||||
|
// error reported when ml > maxMatchLen
|
||||||
|
const errorMatchLenTooBig = 2
|
||||||
|
|
||||||
|
// error reported when mo > available history or mo > s.windowSize
|
||||||
|
const errorMatchOffTooBig = 3
|
||||||
|
|
||||||
|
// error reported when the sum of literal lengths exeeceds the literal buffer size
|
||||||
|
const errorNotEnoughLiterals = 4
|
||||||
|
|
||||||
|
// error reported when capacity of `out` is too small
|
||||||
|
const errorNotEnoughSpace = 5
|
||||||
|
|
||||||
|
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
|
||||||
|
//
|
||||||
|
// Please refer to seqdec_generic.go for the reference implementation.
|
||||||
|
//go:noescape
|
||||||
|
func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||||
|
|
||||||
|
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
|
||||||
|
//
|
||||||
|
// Please refer to seqdec_generic.go for the reference implementation.
|
||||||
|
//go:noescape
|
||||||
|
func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||||
|
|
||||||
|
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
|
||||||
|
//go:noescape
|
||||||
|
func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||||
|
|
||||||
|
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
|
||||||
|
//go:noescape
|
||||||
|
func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||||
|
|
||||||
|
// decode sequences from the stream without the provided history.
|
||||||
|
func (s *sequenceDecs) decode(seqs []seqVals) error {
|
||||||
|
br := s.br
|
||||||
|
|
||||||
|
maxBlockSize := maxCompressedBlockSize
|
||||||
|
if s.windowSize < maxBlockSize {
|
||||||
|
maxBlockSize = s.windowSize
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := decodeAsmContext{
|
||||||
|
llTable: s.litLengths.fse.dt[:maxTablesize],
|
||||||
|
mlTable: s.matchLengths.fse.dt[:maxTablesize],
|
||||||
|
ofTable: s.offsets.fse.dt[:maxTablesize],
|
||||||
|
llState: uint64(s.litLengths.state.state),
|
||||||
|
mlState: uint64(s.matchLengths.state.state),
|
||||||
|
ofState: uint64(s.offsets.state.state),
|
||||||
|
seqs: seqs,
|
||||||
|
iteration: len(seqs) - 1,
|
||||||
|
litRemain: len(s.literals),
|
||||||
|
}
|
||||||
|
|
||||||
|
s.seqSize = 0
|
||||||
|
lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
|
||||||
|
var errCode int
|
||||||
|
if cpuinfo.HasBMI2() {
|
||||||
|
if lte56bits {
|
||||||
|
errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx)
|
||||||
|
} else {
|
||||||
|
errCode = sequenceDecs_decode_bmi2(s, br, &ctx)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if lte56bits {
|
||||||
|
errCode = sequenceDecs_decode_56_amd64(s, br, &ctx)
|
||||||
|
} else {
|
||||||
|
errCode = sequenceDecs_decode_amd64(s, br, &ctx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if errCode != 0 {
|
||||||
|
i := len(seqs) - ctx.iteration - 1
|
||||||
|
switch errCode {
|
||||||
|
case errorMatchLenOfsMismatch:
|
||||||
|
ml := ctx.seqs[i].ml
|
||||||
|
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
|
||||||
|
|
||||||
|
case errorMatchLenTooBig:
|
||||||
|
ml := ctx.seqs[i].ml
|
||||||
|
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
|
||||||
|
|
||||||
|
case errorNotEnoughLiterals:
|
||||||
|
ll := ctx.seqs[i].ll
|
||||||
|
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
|
||||||
|
}
|
||||||
|
|
||||||
|
if ctx.litRemain < 0 {
|
||||||
|
return fmt.Errorf("literal count is too big: total available %d, total requested %d",
|
||||||
|
len(s.literals), len(s.literals)-ctx.litRemain)
|
||||||
|
}
|
||||||
|
|
||||||
|
s.seqSize += ctx.litRemain
|
||||||
|
if s.seqSize > maxBlockSize {
|
||||||
|
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||||
|
}
|
||||||
|
err := br.close()
|
||||||
|
if err != nil {
|
||||||
|
printf("Closing sequences: %v, %+v\n", err, *br)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
type executeAsmContext struct {
|
||||||
|
seqs []seqVals
|
||||||
|
seqIndex int
|
||||||
|
out []byte
|
||||||
|
history []byte
|
||||||
|
literals []byte
|
||||||
|
outPosition int
|
||||||
|
litPosition int
|
||||||
|
windowSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm.
|
||||||
|
//
|
||||||
|
// Returns false if a match offset is too big.
|
||||||
|
//
|
||||||
|
// Please refer to seqdec_generic.go for the reference implementation.
|
||||||
|
//go:noescape
|
||||||
|
func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
|
||||||
|
|
||||||
|
// Same as above, but with safe memcopies
|
||||||
|
//go:noescape
|
||||||
|
func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
|
||||||
|
|
||||||
|
// executeSimple handles cases when dictionary is not used.
|
||||||
|
func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
|
||||||
|
// Ensure we have enough output size...
|
||||||
|
if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) {
|
||||||
|
addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc
|
||||||
|
s.out = append(s.out, make([]byte, addBytes)...)
|
||||||
|
s.out = s.out[:len(s.out)-addBytes]
|
||||||
|
}
|
||||||
|
|
||||||
|
if debugDecoder {
|
||||||
|
printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
var t = len(s.out)
|
||||||
|
out := s.out[:t+s.seqSize]
|
||||||
|
|
||||||
|
ctx := executeAsmContext{
|
||||||
|
seqs: seqs,
|
||||||
|
seqIndex: 0,
|
||||||
|
out: out,
|
||||||
|
history: hist,
|
||||||
|
outPosition: t,
|
||||||
|
litPosition: 0,
|
||||||
|
literals: s.literals,
|
||||||
|
windowSize: s.windowSize,
|
||||||
|
}
|
||||||
|
var ok bool
|
||||||
|
if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
|
||||||
|
ok = sequenceDecs_executeSimple_safe_amd64(&ctx)
|
||||||
|
} else {
|
||||||
|
ok = sequenceDecs_executeSimple_amd64(&ctx)
|
||||||
|
}
|
||||||
|
if !ok {
|
||||||
|
return fmt.Errorf("match offset (%d) bigger than current history (%d)",
|
||||||
|
seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
|
||||||
|
}
|
||||||
|
s.literals = s.literals[ctx.litPosition:]
|
||||||
|
t = ctx.outPosition
|
||||||
|
|
||||||
|
// Add final literals
|
||||||
|
copy(out[t:], s.literals)
|
||||||
|
if debugDecoder {
|
||||||
|
t += len(s.literals)
|
||||||
|
if t != len(out) {
|
||||||
|
panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s.out = out
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,237 @@
|
||||||
|
//go:build !amd64 || appengine || !gc || noasm
|
||||||
|
// +build !amd64 appengine !gc noasm
|
||||||
|
|
||||||
|
package zstd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
)
|
||||||
|
|
||||||
|
// decode sequences from the stream with the provided history but without dictionary.
|
||||||
|
func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// decode sequences from the stream without the provided history.
|
||||||
|
func (s *sequenceDecs) decode(seqs []seqVals) error {
|
||||||
|
br := s.br
|
||||||
|
|
||||||
|
// Grab full sizes tables, to avoid bounds checks.
|
||||||
|
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
|
||||||
|
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
|
||||||
|
s.seqSize = 0
|
||||||
|
litRemain := len(s.literals)
|
||||||
|
|
||||||
|
maxBlockSize := maxCompressedBlockSize
|
||||||
|
if s.windowSize < maxBlockSize {
|
||||||
|
maxBlockSize = s.windowSize
|
||||||
|
}
|
||||||
|
for i := range seqs {
|
||||||
|
var ll, mo, ml int
|
||||||
|
if br.off > 4+((maxOffsetBits+16+16)>>3) {
|
||||||
|
// inlined function:
|
||||||
|
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
|
||||||
|
|
||||||
|
// Final will not read from stream.
|
||||||
|
var llB, mlB, moB uint8
|
||||||
|
ll, llB = llState.final()
|
||||||
|
ml, mlB = mlState.final()
|
||||||
|
mo, moB = ofState.final()
|
||||||
|
|
||||||
|
// extra bits are stored in reverse order.
|
||||||
|
br.fillFast()
|
||||||
|
mo += br.getBits(moB)
|
||||||
|
if s.maxBits > 32 {
|
||||||
|
br.fillFast()
|
||||||
|
}
|
||||||
|
ml += br.getBits(mlB)
|
||||||
|
ll += br.getBits(llB)
|
||||||
|
|
||||||
|
if moB > 1 {
|
||||||
|
s.prevOffset[2] = s.prevOffset[1]
|
||||||
|
s.prevOffset[1] = s.prevOffset[0]
|
||||||
|
s.prevOffset[0] = mo
|
||||||
|
} else {
|
||||||
|
// mo = s.adjustOffset(mo, ll, moB)
|
||||||
|
// Inlined for rather big speedup
|
||||||
|
if ll == 0 {
|
||||||
|
// There is an exception though, when current sequence's literals_length = 0.
|
||||||
|
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
|
||||||
|
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
|
||||||
|
mo++
|
||||||
|
}
|
||||||
|
|
||||||
|
if mo == 0 {
|
||||||
|
mo = s.prevOffset[0]
|
||||||
|
} else {
|
||||||
|
var temp int
|
||||||
|
if mo == 3 {
|
||||||
|
temp = s.prevOffset[0] - 1
|
||||||
|
} else {
|
||||||
|
temp = s.prevOffset[mo]
|
||||||
|
}
|
||||||
|
|
||||||
|
if temp == 0 {
|
||||||
|
// 0 is not valid; input is corrupted; force offset to 1
|
||||||
|
println("WARNING: temp was 0")
|
||||||
|
temp = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if mo != 1 {
|
||||||
|
s.prevOffset[2] = s.prevOffset[1]
|
||||||
|
}
|
||||||
|
s.prevOffset[1] = s.prevOffset[0]
|
||||||
|
s.prevOffset[0] = temp
|
||||||
|
mo = temp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
br.fillFast()
|
||||||
|
} else {
|
||||||
|
if br.overread() {
|
||||||
|
if debugDecoder {
|
||||||
|
printf("reading sequence %d, exceeded available data\n", i)
|
||||||
|
}
|
||||||
|
return io.ErrUnexpectedEOF
|
||||||
|
}
|
||||||
|
ll, mo, ml = s.next(br, llState, mlState, ofState)
|
||||||
|
br.fill()
|
||||||
|
}
|
||||||
|
|
||||||
|
if debugSequences {
|
||||||
|
println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
|
||||||
|
}
|
||||||
|
// Evaluate.
|
||||||
|
// We might be doing this async, so do it early.
|
||||||
|
if mo == 0 && ml > 0 {
|
||||||
|
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
|
||||||
|
}
|
||||||
|
if ml > maxMatchLen {
|
||||||
|
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
|
||||||
|
}
|
||||||
|
s.seqSize += ll + ml
|
||||||
|
if s.seqSize > maxBlockSize {
|
||||||
|
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||||
|
}
|
||||||
|
litRemain -= ll
|
||||||
|
if litRemain < 0 {
|
||||||
|
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll)
|
||||||
|
}
|
||||||
|
seqs[i] = seqVals{
|
||||||
|
ll: ll,
|
||||||
|
ml: ml,
|
||||||
|
mo: mo,
|
||||||
|
}
|
||||||
|
if i == len(seqs)-1 {
|
||||||
|
// This is the last sequence, so we shouldn't update state.
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
// Manually inlined, ~ 5-20% faster
|
||||||
|
// Update all 3 states at once. Approx 20% faster.
|
||||||
|
nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
|
||||||
|
if nBits == 0 {
|
||||||
|
llState = llTable[llState.newState()&maxTableMask]
|
||||||
|
mlState = mlTable[mlState.newState()&maxTableMask]
|
||||||
|
ofState = ofTable[ofState.newState()&maxTableMask]
|
||||||
|
} else {
|
||||||
|
bits := br.get32BitsFast(nBits)
|
||||||
|
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
|
||||||
|
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
|
||||||
|
|
||||||
|
lowBits = uint16(bits >> (ofState.nbBits() & 31))
|
||||||
|
lowBits &= bitMask[mlState.nbBits()&15]
|
||||||
|
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
|
||||||
|
|
||||||
|
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
|
||||||
|
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s.seqSize += litRemain
|
||||||
|
if s.seqSize > maxBlockSize {
|
||||||
|
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||||
|
}
|
||||||
|
err := br.close()
|
||||||
|
if err != nil {
|
||||||
|
printf("Closing sequences: %v, %+v\n", err, *br)
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// executeSimple handles cases when a dictionary is not used.
|
||||||
|
func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
|
||||||
|
// Ensure we have enough output size...
|
||||||
|
if len(s.out)+s.seqSize > cap(s.out) {
|
||||||
|
addBytes := s.seqSize + len(s.out)
|
||||||
|
s.out = append(s.out, make([]byte, addBytes)...)
|
||||||
|
s.out = s.out[:len(s.out)-addBytes]
|
||||||
|
}
|
||||||
|
|
||||||
|
if debugDecoder {
|
||||||
|
printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
var t = len(s.out)
|
||||||
|
out := s.out[:t+s.seqSize]
|
||||||
|
|
||||||
|
for _, seq := range seqs {
|
||||||
|
// Add literals
|
||||||
|
copy(out[t:], s.literals[:seq.ll])
|
||||||
|
t += seq.ll
|
||||||
|
s.literals = s.literals[seq.ll:]
|
||||||
|
|
||||||
|
// Malformed input
|
||||||
|
if seq.mo > t+len(hist) || seq.mo > s.windowSize {
|
||||||
|
return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy from history.
|
||||||
|
if v := seq.mo - t; v > 0 {
|
||||||
|
// v is the start position in history from end.
|
||||||
|
start := len(hist) - v
|
||||||
|
if seq.ml > v {
|
||||||
|
// Some goes into the current block.
|
||||||
|
// Copy remainder of history
|
||||||
|
copy(out[t:], hist[start:])
|
||||||
|
t += v
|
||||||
|
seq.ml -= v
|
||||||
|
} else {
|
||||||
|
copy(out[t:], hist[start:start+seq.ml])
|
||||||
|
t += seq.ml
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We must be in the current buffer now
|
||||||
|
if seq.ml > 0 {
|
||||||
|
start := t - seq.mo
|
||||||
|
if seq.ml <= t-start {
|
||||||
|
// No overlap
|
||||||
|
copy(out[t:], out[start:start+seq.ml])
|
||||||
|
t += seq.ml
|
||||||
|
} else {
|
||||||
|
// Overlapping copy
|
||||||
|
// Extend destination slice and copy one byte at the time.
|
||||||
|
src := out[start : start+seq.ml]
|
||||||
|
dst := out[t:]
|
||||||
|
dst = dst[:len(src)]
|
||||||
|
t += len(src)
|
||||||
|
// Destination is the space we just added.
|
||||||
|
for i := range src {
|
||||||
|
dst[i] = src[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Add final literals
|
||||||
|
copy(out[t:], s.literals)
|
||||||
|
if debugDecoder {
|
||||||
|
t += len(s.literals)
|
||||||
|
if t != len(out) {
|
||||||
|
panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s.out = out
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
|
@ -18,26 +18,44 @@ const ZipMethodWinZip = 93
|
||||||
// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT
|
// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT
|
||||||
const ZipMethodPKWare = 20
|
const ZipMethodPKWare = 20
|
||||||
|
|
||||||
var zipReaderPool sync.Pool
|
// zipReaderPool is the default reader pool.
|
||||||
|
var zipReaderPool = sync.Pool{New: func() interface{} {
|
||||||
|
z, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderMaxWindow(128<<20), WithDecoderConcurrency(1))
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return z
|
||||||
|
}}
|
||||||
|
|
||||||
// newZipReader creates a pooled zip decompressor.
|
// newZipReader creates a pooled zip decompressor.
|
||||||
func newZipReader(r io.Reader) io.ReadCloser {
|
func newZipReader(opts ...DOption) func(r io.Reader) io.ReadCloser {
|
||||||
dec, ok := zipReaderPool.Get().(*Decoder)
|
pool := &zipReaderPool
|
||||||
if ok {
|
if len(opts) > 0 {
|
||||||
dec.Reset(r)
|
opts = append([]DOption{WithDecoderLowmem(true), WithDecoderMaxWindow(128 << 20)}, opts...)
|
||||||
} else {
|
// Force concurrency 1
|
||||||
d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
|
opts = append(opts, WithDecoderConcurrency(1))
|
||||||
if err != nil {
|
// Create our own pool
|
||||||
panic(err)
|
pool = &sync.Pool{}
|
||||||
}
|
}
|
||||||
dec = d
|
return func(r io.Reader) io.ReadCloser {
|
||||||
|
dec, ok := pool.Get().(*Decoder)
|
||||||
|
if ok {
|
||||||
|
dec.Reset(r)
|
||||||
|
} else {
|
||||||
|
d, err := NewReader(r, opts...)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
dec = d
|
||||||
|
}
|
||||||
|
return &pooledZipReader{dec: dec, pool: pool}
|
||||||
}
|
}
|
||||||
return &pooledZipReader{dec: dec}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type pooledZipReader struct {
|
type pooledZipReader struct {
|
||||||
mu sync.Mutex // guards Close and Read
|
mu sync.Mutex // guards Close and Read
|
||||||
dec *Decoder
|
pool *sync.Pool
|
||||||
|
dec *Decoder
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *pooledZipReader) Read(p []byte) (n int, err error) {
|
func (r *pooledZipReader) Read(p []byte) (n int, err error) {
|
||||||
|
@ -48,8 +66,8 @@ func (r *pooledZipReader) Read(p []byte) (n int, err error) {
|
||||||
}
|
}
|
||||||
dec, err := r.dec.Read(p)
|
dec, err := r.dec.Read(p)
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
err = r.dec.Reset(nil)
|
r.dec.Reset(nil)
|
||||||
zipReaderPool.Put(r.dec)
|
r.pool.Put(r.dec)
|
||||||
r.dec = nil
|
r.dec = nil
|
||||||
}
|
}
|
||||||
return dec, err
|
return dec, err
|
||||||
|
@ -61,7 +79,7 @@ func (r *pooledZipReader) Close() error {
|
||||||
var err error
|
var err error
|
||||||
if r.dec != nil {
|
if r.dec != nil {
|
||||||
err = r.dec.Reset(nil)
|
err = r.dec.Reset(nil)
|
||||||
zipReaderPool.Put(r.dec)
|
r.pool.Put(r.dec)
|
||||||
r.dec = nil
|
r.dec = nil
|
||||||
}
|
}
|
||||||
return err
|
return err
|
||||||
|
@ -115,6 +133,9 @@ func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
|
||||||
|
|
||||||
// ZipDecompressor returns a decompressor that can be registered with zip libraries.
|
// ZipDecompressor returns a decompressor that can be registered with zip libraries.
|
||||||
// See ZipCompressor for example.
|
// See ZipCompressor for example.
|
||||||
func ZipDecompressor() func(r io.Reader) io.ReadCloser {
|
// Options can be specified. WithDecoderConcurrency(1) is forced,
|
||||||
return newZipReader
|
// and by default a 128MB maximum decompression window is specified.
|
||||||
|
// The window size can be overridden if required.
|
||||||
|
func ZipDecompressor(opts ...DOption) func(r io.Reader) io.ReadCloser {
|
||||||
|
return newZipReader(opts...)
|
||||||
}
|
}
|
||||||
|
|
|
@ -110,17 +110,6 @@ func printf(format string, a ...interface{}) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// matchLenFast does matching, but will not match the last up to 7 bytes.
|
|
||||||
func matchLenFast(a, b []byte) int {
|
|
||||||
endI := len(a) & (math.MaxInt32 - 7)
|
|
||||||
for i := 0; i < endI; i += 8 {
|
|
||||||
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
|
|
||||||
return i + bits.TrailingZeros64(diff)>>3
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return endI
|
|
||||||
}
|
|
||||||
|
|
||||||
// matchLen returns the maximum length.
|
// matchLen returns the maximum length.
|
||||||
// a must be the shortest of the two.
|
// a must be the shortest of the two.
|
||||||
// The function also returns whether all bytes matched.
|
// The function also returns whether all bytes matched.
|
||||||
|
|
|
@ -133,11 +133,12 @@ github.com/imdario/mergo
|
||||||
# github.com/inconshreveable/mousetrap v1.0.0
|
# github.com/inconshreveable/mousetrap v1.0.0
|
||||||
## explicit
|
## explicit
|
||||||
github.com/inconshreveable/mousetrap
|
github.com/inconshreveable/mousetrap
|
||||||
# github.com/klauspost/compress v1.15.1
|
# github.com/klauspost/compress v1.15.9
|
||||||
## explicit; go 1.15
|
## explicit; go 1.16
|
||||||
github.com/klauspost/compress
|
github.com/klauspost/compress
|
||||||
github.com/klauspost/compress/fse
|
github.com/klauspost/compress/fse
|
||||||
github.com/klauspost/compress/huff0
|
github.com/klauspost/compress/huff0
|
||||||
|
github.com/klauspost/compress/internal/cpuinfo
|
||||||
github.com/klauspost/compress/internal/snapref
|
github.com/klauspost/compress/internal/snapref
|
||||||
github.com/klauspost/compress/zstd
|
github.com/klauspost/compress/zstd
|
||||||
github.com/klauspost/compress/zstd/internal/xxhash
|
github.com/klauspost/compress/zstd/internal/xxhash
|
||||||
|
|
Loading…
Reference in New Issue