mirror of https://github.com/docker/cli.git
vendor: github.com/klauspost/compress v1.15.1
full diff: https://github.com/klauspost/compress/compare/v1.15.0...v1.15.1 Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
parent
acf6aee911
commit
b3f3beb739
|
@ -53,7 +53,7 @@ require (
|
||||||
github.com/golang/protobuf v1.5.2 // indirect
|
github.com/golang/protobuf v1.5.2 // indirect
|
||||||
github.com/gorilla/mux v1.8.0 // indirect; updated to v1.8.0 to get rid of old compatibility for "context"
|
github.com/gorilla/mux v1.8.0 // indirect; updated to v1.8.0 to get rid of old compatibility for "context"
|
||||||
github.com/inconshreveable/mousetrap v1.0.0 // indirect
|
github.com/inconshreveable/mousetrap v1.0.0 // indirect
|
||||||
github.com/klauspost/compress v1.15.0 // indirect
|
github.com/klauspost/compress v1.15.1 // indirect
|
||||||
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
|
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
|
||||||
github.com/miekg/pkcs11 v1.1.1 // indirect
|
github.com/miekg/pkcs11 v1.1.1 // indirect
|
||||||
github.com/moby/sys/symlink v0.2.0 // indirect
|
github.com/moby/sys/symlink v0.2.0 // indirect
|
||||||
|
|
|
@ -249,8 +249,8 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8
|
||||||
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
||||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||||
github.com/klauspost/compress v1.15.0 h1:xqfchp4whNFxn5A4XFyyYtitiWI8Hy5EW59jEwcyL6U=
|
github.com/klauspost/compress v1.15.1 h1:y9FcTHGyrebwfP0ZZqFiaxTaiDnUrGkJkI+f583BL1A=
|
||||||
github.com/klauspost/compress v1.15.0/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
|
github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||||
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
||||||
|
|
|
@ -17,6 +17,23 @@ This package provides various compression algorithms.
|
||||||
|
|
||||||
# changelog
|
# changelog
|
||||||
|
|
||||||
|
* Mar 3, 2022 (v1.15.0)
|
||||||
|
* zstd: Refactor decoder by @klauspost in [#498](https://github.com/klauspost/compress/pull/498)
|
||||||
|
* zstd: Add stream encoding without goroutines by @klauspost in [#505](https://github.com/klauspost/compress/pull/505)
|
||||||
|
* huff0: Prevent single blocks exceeding 16 bits by @klauspost in[#507](https://github.com/klauspost/compress/pull/507)
|
||||||
|
* flate: Inline literal emission by @klauspost in [#509](https://github.com/klauspost/compress/pull/509)
|
||||||
|
* gzhttp: Add zstd to transport by @klauspost in [#400](https://github.com/klauspost/compress/pull/400)
|
||||||
|
* gzhttp: Make content-type optional by @klauspost in [#510](https://github.com/klauspost/compress/pull/510)
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>See Details</summary>
|
||||||
|
Both compression and decompression now supports "synchronous" stream operations. This means that whenever "concurrency" is set to 1, they will operate without spawning goroutines.
|
||||||
|
|
||||||
|
Stream decompression is now faster on asynchronous, since the goroutine allocation much more effectively splits the workload. On typical streams this will typically use 2 cores fully for decompression. When a stream has finished decoding no goroutines will be left over, so decoders can now safely be pooled and still be garbage collected.
|
||||||
|
|
||||||
|
While the release has been extensively tested, it is recommended to testing when upgrading.
|
||||||
|
</details>
|
||||||
|
|
||||||
* Feb 22, 2022 (v1.14.4)
|
* Feb 22, 2022 (v1.14.4)
|
||||||
* flate: Fix rare huffman only (-2) corruption. [#503](https://github.com/klauspost/compress/pull/503)
|
* flate: Fix rare huffman only (-2) corruption. [#503](https://github.com/klauspost/compress/pull/503)
|
||||||
* zip: Update deprecated CreateHeaderRaw to correctly call CreateRaw by @saracen in [#502](https://github.com/klauspost/compress/pull/502)
|
* zip: Update deprecated CreateHeaderRaw to correctly call CreateRaw by @saracen in [#502](https://github.com/klauspost/compress/pull/502)
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
package huff0
|
||||||
|
|
||||||
|
//go:generate go run generate.go
|
||||||
|
//go:generate asmfmt -w decompress_amd64.s
|
||||||
|
//go:generate asmfmt -w decompress_8b_amd64.s
|
|
@ -165,6 +165,11 @@ func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 {
|
||||||
return uint16(b.value >> ((64 - n) & 63))
|
return uint16(b.value >> ((64 - n) & 63))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// peekTopBits(n) is equvialent to peekBitFast(64 - n)
|
||||||
|
func (b *bitReaderShifted) peekTopBits(n uint8) uint16 {
|
||||||
|
return uint16(b.value >> n)
|
||||||
|
}
|
||||||
|
|
||||||
func (b *bitReaderShifted) advance(n uint8) {
|
func (b *bitReaderShifted) advance(n uint8) {
|
||||||
b.bitsRead += n
|
b.bitsRead += n
|
||||||
b.value <<= n & 63
|
b.value <<= n & 63
|
||||||
|
|
|
@ -725,189 +725,6 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
|
||||||
return dst, br.close()
|
return dst, br.close()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decompress4X will decompress a 4X encoded stream.
|
|
||||||
// The length of the supplied input must match the end of a block exactly.
|
|
||||||
// The *capacity* of the dst slice must match the destination size of
|
|
||||||
// the uncompressed data exactly.
|
|
||||||
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
|
||||||
if len(d.dt.single) == 0 {
|
|
||||||
return nil, errors.New("no table loaded")
|
|
||||||
}
|
|
||||||
if len(src) < 6+(4*1) {
|
|
||||||
return nil, errors.New("input too small")
|
|
||||||
}
|
|
||||||
if use8BitTables && d.actualTableLog <= 8 {
|
|
||||||
return d.decompress4X8bit(dst, src)
|
|
||||||
}
|
|
||||||
|
|
||||||
var br [4]bitReaderShifted
|
|
||||||
// Decode "jump table"
|
|
||||||
start := 6
|
|
||||||
for i := 0; i < 3; i++ {
|
|
||||||
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
|
|
||||||
if start+length >= len(src) {
|
|
||||||
return nil, errors.New("truncated input (or invalid offset)")
|
|
||||||
}
|
|
||||||
err := br[i].init(src[start : start+length])
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
start += length
|
|
||||||
}
|
|
||||||
err := br[3].init(src[start:])
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// destination, offset to match first output
|
|
||||||
dstSize := cap(dst)
|
|
||||||
dst = dst[:dstSize]
|
|
||||||
out := dst
|
|
||||||
dstEvery := (dstSize + 3) / 4
|
|
||||||
|
|
||||||
const tlSize = 1 << tableLogMax
|
|
||||||
const tlMask = tlSize - 1
|
|
||||||
single := d.dt.single[:tlSize]
|
|
||||||
|
|
||||||
// Use temp table to avoid bound checks/append penalty.
|
|
||||||
buf := d.buffer()
|
|
||||||
var off uint8
|
|
||||||
var decoded int
|
|
||||||
|
|
||||||
// Decode 2 values from each decoder/loop.
|
|
||||||
const bufoff = 256
|
|
||||||
for {
|
|
||||||
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const stream = 0
|
|
||||||
const stream2 = 1
|
|
||||||
br[stream].fillFast()
|
|
||||||
br[stream2].fillFast()
|
|
||||||
|
|
||||||
val := br[stream].peekBitsFast(d.actualTableLog)
|
|
||||||
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
|
||||||
v := single[val&tlMask]
|
|
||||||
v2 := single[val2&tlMask]
|
|
||||||
br[stream].advance(uint8(v.entry))
|
|
||||||
br[stream2].advance(uint8(v2.entry))
|
|
||||||
buf[stream][off] = uint8(v.entry >> 8)
|
|
||||||
buf[stream2][off] = uint8(v2.entry >> 8)
|
|
||||||
|
|
||||||
val = br[stream].peekBitsFast(d.actualTableLog)
|
|
||||||
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
|
||||||
v = single[val&tlMask]
|
|
||||||
v2 = single[val2&tlMask]
|
|
||||||
br[stream].advance(uint8(v.entry))
|
|
||||||
br[stream2].advance(uint8(v2.entry))
|
|
||||||
buf[stream][off+1] = uint8(v.entry >> 8)
|
|
||||||
buf[stream2][off+1] = uint8(v2.entry >> 8)
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const stream = 2
|
|
||||||
const stream2 = 3
|
|
||||||
br[stream].fillFast()
|
|
||||||
br[stream2].fillFast()
|
|
||||||
|
|
||||||
val := br[stream].peekBitsFast(d.actualTableLog)
|
|
||||||
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
|
||||||
v := single[val&tlMask]
|
|
||||||
v2 := single[val2&tlMask]
|
|
||||||
br[stream].advance(uint8(v.entry))
|
|
||||||
br[stream2].advance(uint8(v2.entry))
|
|
||||||
buf[stream][off] = uint8(v.entry >> 8)
|
|
||||||
buf[stream2][off] = uint8(v2.entry >> 8)
|
|
||||||
|
|
||||||
val = br[stream].peekBitsFast(d.actualTableLog)
|
|
||||||
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
|
||||||
v = single[val&tlMask]
|
|
||||||
v2 = single[val2&tlMask]
|
|
||||||
br[stream].advance(uint8(v.entry))
|
|
||||||
br[stream2].advance(uint8(v2.entry))
|
|
||||||
buf[stream][off+1] = uint8(v.entry >> 8)
|
|
||||||
buf[stream2][off+1] = uint8(v2.entry >> 8)
|
|
||||||
}
|
|
||||||
|
|
||||||
off += 2
|
|
||||||
|
|
||||||
if off == 0 {
|
|
||||||
if bufoff > dstEvery {
|
|
||||||
d.bufs.Put(buf)
|
|
||||||
return nil, errors.New("corruption detected: stream overrun 1")
|
|
||||||
}
|
|
||||||
copy(out, buf[0][:])
|
|
||||||
copy(out[dstEvery:], buf[1][:])
|
|
||||||
copy(out[dstEvery*2:], buf[2][:])
|
|
||||||
copy(out[dstEvery*3:], buf[3][:])
|
|
||||||
out = out[bufoff:]
|
|
||||||
decoded += bufoff * 4
|
|
||||||
// There must at least be 3 buffers left.
|
|
||||||
if len(out) < dstEvery*3 {
|
|
||||||
d.bufs.Put(buf)
|
|
||||||
return nil, errors.New("corruption detected: stream overrun 2")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if off > 0 {
|
|
||||||
ioff := int(off)
|
|
||||||
if len(out) < dstEvery*3+ioff {
|
|
||||||
d.bufs.Put(buf)
|
|
||||||
return nil, errors.New("corruption detected: stream overrun 3")
|
|
||||||
}
|
|
||||||
copy(out, buf[0][:off])
|
|
||||||
copy(out[dstEvery:], buf[1][:off])
|
|
||||||
copy(out[dstEvery*2:], buf[2][:off])
|
|
||||||
copy(out[dstEvery*3:], buf[3][:off])
|
|
||||||
decoded += int(off) * 4
|
|
||||||
out = out[off:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decode remaining.
|
|
||||||
remainBytes := dstEvery - (decoded / 4)
|
|
||||||
for i := range br {
|
|
||||||
offset := dstEvery * i
|
|
||||||
endsAt := offset + remainBytes
|
|
||||||
if endsAt > len(out) {
|
|
||||||
endsAt = len(out)
|
|
||||||
}
|
|
||||||
br := &br[i]
|
|
||||||
bitsLeft := br.remaining()
|
|
||||||
for bitsLeft > 0 {
|
|
||||||
br.fill()
|
|
||||||
if offset >= endsAt {
|
|
||||||
d.bufs.Put(buf)
|
|
||||||
return nil, errors.New("corruption detected: stream overrun 4")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read value and increment offset.
|
|
||||||
val := br.peekBitsFast(d.actualTableLog)
|
|
||||||
v := single[val&tlMask].entry
|
|
||||||
nBits := uint8(v)
|
|
||||||
br.advance(nBits)
|
|
||||||
bitsLeft -= uint(nBits)
|
|
||||||
out[offset] = uint8(v >> 8)
|
|
||||||
offset++
|
|
||||||
}
|
|
||||||
if offset != endsAt {
|
|
||||||
d.bufs.Put(buf)
|
|
||||||
return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
|
|
||||||
}
|
|
||||||
decoded += offset - dstEvery*i
|
|
||||||
err = br.close()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
d.bufs.Put(buf)
|
|
||||||
if dstSize != decoded {
|
|
||||||
return nil, errors.New("corruption detected: short output block")
|
|
||||||
}
|
|
||||||
return dst, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decompress4X will decompress a 4X encoded stream.
|
// Decompress4X will decompress a 4X encoded stream.
|
||||||
// The length of the supplied input must match the end of a block exactly.
|
// The length of the supplied input must match the end of a block exactly.
|
||||||
// The *capacity* of the dst slice must match the destination size of
|
// The *capacity* of the dst slice must match the destination size of
|
||||||
|
|
|
@ -0,0 +1,488 @@
|
||||||
|
// +build !appengine
|
||||||
|
// +build gc
|
||||||
|
// +build !noasm
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
#include "funcdata.h"
|
||||||
|
#include "go_asm.h"
|
||||||
|
|
||||||
|
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
||||||
|
|
||||||
|
// func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||||
|
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
||||||
|
TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8
|
||||||
|
#define off R8
|
||||||
|
#define buffer DI
|
||||||
|
#define table SI
|
||||||
|
|
||||||
|
#define br_bits_read R9
|
||||||
|
#define br_value R10
|
||||||
|
#define br_offset R11
|
||||||
|
#define peek_bits R12
|
||||||
|
#define exhausted DX
|
||||||
|
|
||||||
|
#define br0 R13
|
||||||
|
#define br1 R14
|
||||||
|
#define br2 R15
|
||||||
|
#define br3 BP
|
||||||
|
|
||||||
|
MOVQ BP, 0(SP)
|
||||||
|
|
||||||
|
XORQ exhausted, exhausted // exhausted = false
|
||||||
|
XORQ off, off // off = 0
|
||||||
|
|
||||||
|
MOVBQZX peekBits+32(FP), peek_bits
|
||||||
|
MOVQ buf+40(FP), buffer
|
||||||
|
MOVQ tbl+48(FP), table
|
||||||
|
|
||||||
|
MOVQ pbr0+0(FP), br0
|
||||||
|
MOVQ pbr1+8(FP), br1
|
||||||
|
MOVQ pbr2+16(FP), br2
|
||||||
|
MOVQ pbr3+24(FP), br3
|
||||||
|
|
||||||
|
main_loop:
|
||||||
|
|
||||||
|
// const stream = 0
|
||||||
|
// br0.fillFast()
|
||||||
|
MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read
|
||||||
|
MOVQ bitReaderShifted_value(br0), br_value
|
||||||
|
MOVQ bitReaderShifted_off(br0), br_offset
|
||||||
|
|
||||||
|
// if b.bitsRead >= 32 {
|
||||||
|
CMPQ br_bits_read, $32
|
||||||
|
JB skip_fill0
|
||||||
|
|
||||||
|
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||||
|
SUBQ $4, br_offset // b.off -= 4
|
||||||
|
|
||||||
|
// v := b.in[b.off-4 : b.off]
|
||||||
|
// v = v[:4]
|
||||||
|
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
MOVQ bitReaderShifted_in(br0), AX
|
||||||
|
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||||
|
|
||||||
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||||
|
MOVQ br_bits_read, CX
|
||||||
|
SHLQ CL, AX
|
||||||
|
ORQ AX, br_value
|
||||||
|
|
||||||
|
// exhausted = exhausted || (br0.off < 4)
|
||||||
|
CMPQ br_offset, $4
|
||||||
|
SETLT DL
|
||||||
|
ORB DL, DH
|
||||||
|
|
||||||
|
// }
|
||||||
|
skip_fill0:
|
||||||
|
|
||||||
|
// val0 := br0.peekTopBits(peekBits)
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v0 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br0.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// val1 := br0.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v1 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br0.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CX, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||||
|
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||||
|
MOVW BX, 0(buffer)(off*1)
|
||||||
|
|
||||||
|
// SECOND PART:
|
||||||
|
// val2 := br0.peekTopBits(peekBits)
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v2 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br0.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// val3 := br0.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v3 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br0.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CX, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||||
|
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||||
|
MOVW BX, 0+2(buffer)(off*1)
|
||||||
|
|
||||||
|
// update the bitrader reader structure
|
||||||
|
MOVB br_bits_read, bitReaderShifted_bitsRead(br0)
|
||||||
|
MOVQ br_value, bitReaderShifted_value(br0)
|
||||||
|
MOVQ br_offset, bitReaderShifted_off(br0)
|
||||||
|
|
||||||
|
// const stream = 1
|
||||||
|
// br1.fillFast()
|
||||||
|
MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read
|
||||||
|
MOVQ bitReaderShifted_value(br1), br_value
|
||||||
|
MOVQ bitReaderShifted_off(br1), br_offset
|
||||||
|
|
||||||
|
// if b.bitsRead >= 32 {
|
||||||
|
CMPQ br_bits_read, $32
|
||||||
|
JB skip_fill1
|
||||||
|
|
||||||
|
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||||
|
SUBQ $4, br_offset // b.off -= 4
|
||||||
|
|
||||||
|
// v := b.in[b.off-4 : b.off]
|
||||||
|
// v = v[:4]
|
||||||
|
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
MOVQ bitReaderShifted_in(br1), AX
|
||||||
|
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||||
|
|
||||||
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||||
|
MOVQ br_bits_read, CX
|
||||||
|
SHLQ CL, AX
|
||||||
|
ORQ AX, br_value
|
||||||
|
|
||||||
|
// exhausted = exhausted || (br1.off < 4)
|
||||||
|
CMPQ br_offset, $4
|
||||||
|
SETLT DL
|
||||||
|
ORB DL, DH
|
||||||
|
|
||||||
|
// }
|
||||||
|
skip_fill1:
|
||||||
|
|
||||||
|
// val0 := br1.peekTopBits(peekBits)
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v0 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br1.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// val1 := br1.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v1 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br1.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CX, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||||
|
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||||
|
MOVW BX, 256(buffer)(off*1)
|
||||||
|
|
||||||
|
// SECOND PART:
|
||||||
|
// val2 := br1.peekTopBits(peekBits)
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v2 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br1.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// val3 := br1.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v3 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br1.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CX, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||||
|
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||||
|
MOVW BX, 256+2(buffer)(off*1)
|
||||||
|
|
||||||
|
// update the bitrader reader structure
|
||||||
|
MOVB br_bits_read, bitReaderShifted_bitsRead(br1)
|
||||||
|
MOVQ br_value, bitReaderShifted_value(br1)
|
||||||
|
MOVQ br_offset, bitReaderShifted_off(br1)
|
||||||
|
|
||||||
|
// const stream = 2
|
||||||
|
// br2.fillFast()
|
||||||
|
MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read
|
||||||
|
MOVQ bitReaderShifted_value(br2), br_value
|
||||||
|
MOVQ bitReaderShifted_off(br2), br_offset
|
||||||
|
|
||||||
|
// if b.bitsRead >= 32 {
|
||||||
|
CMPQ br_bits_read, $32
|
||||||
|
JB skip_fill2
|
||||||
|
|
||||||
|
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||||
|
SUBQ $4, br_offset // b.off -= 4
|
||||||
|
|
||||||
|
// v := b.in[b.off-4 : b.off]
|
||||||
|
// v = v[:4]
|
||||||
|
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
MOVQ bitReaderShifted_in(br2), AX
|
||||||
|
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||||
|
|
||||||
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||||
|
MOVQ br_bits_read, CX
|
||||||
|
SHLQ CL, AX
|
||||||
|
ORQ AX, br_value
|
||||||
|
|
||||||
|
// exhausted = exhausted || (br2.off < 4)
|
||||||
|
CMPQ br_offset, $4
|
||||||
|
SETLT DL
|
||||||
|
ORB DL, DH
|
||||||
|
|
||||||
|
// }
|
||||||
|
skip_fill2:
|
||||||
|
|
||||||
|
// val0 := br2.peekTopBits(peekBits)
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v0 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br2.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// val1 := br2.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v1 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br2.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CX, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||||
|
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||||
|
MOVW BX, 512(buffer)(off*1)
|
||||||
|
|
||||||
|
// SECOND PART:
|
||||||
|
// val2 := br2.peekTopBits(peekBits)
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v2 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br2.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// val3 := br2.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v3 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br2.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CX, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||||
|
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||||
|
MOVW BX, 512+2(buffer)(off*1)
|
||||||
|
|
||||||
|
// update the bitrader reader structure
|
||||||
|
MOVB br_bits_read, bitReaderShifted_bitsRead(br2)
|
||||||
|
MOVQ br_value, bitReaderShifted_value(br2)
|
||||||
|
MOVQ br_offset, bitReaderShifted_off(br2)
|
||||||
|
|
||||||
|
// const stream = 3
|
||||||
|
// br3.fillFast()
|
||||||
|
MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read
|
||||||
|
MOVQ bitReaderShifted_value(br3), br_value
|
||||||
|
MOVQ bitReaderShifted_off(br3), br_offset
|
||||||
|
|
||||||
|
// if b.bitsRead >= 32 {
|
||||||
|
CMPQ br_bits_read, $32
|
||||||
|
JB skip_fill3
|
||||||
|
|
||||||
|
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||||
|
SUBQ $4, br_offset // b.off -= 4
|
||||||
|
|
||||||
|
// v := b.in[b.off-4 : b.off]
|
||||||
|
// v = v[:4]
|
||||||
|
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
MOVQ bitReaderShifted_in(br3), AX
|
||||||
|
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||||
|
|
||||||
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||||
|
MOVQ br_bits_read, CX
|
||||||
|
SHLQ CL, AX
|
||||||
|
ORQ AX, br_value
|
||||||
|
|
||||||
|
// exhausted = exhausted || (br3.off < 4)
|
||||||
|
CMPQ br_offset, $4
|
||||||
|
SETLT DL
|
||||||
|
ORB DL, DH
|
||||||
|
|
||||||
|
// }
|
||||||
|
skip_fill3:
|
||||||
|
|
||||||
|
// val0 := br3.peekTopBits(peekBits)
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v0 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br3.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// val1 := br3.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v1 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br3.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CX, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||||
|
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||||
|
MOVW BX, 768(buffer)(off*1)
|
||||||
|
|
||||||
|
// SECOND PART:
|
||||||
|
// val2 := br3.peekTopBits(peekBits)
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v2 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br3.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// val3 := br3.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v3 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br3.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CX, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||||
|
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||||
|
MOVW BX, 768+2(buffer)(off*1)
|
||||||
|
|
||||||
|
// update the bitrader reader structure
|
||||||
|
MOVB br_bits_read, bitReaderShifted_bitsRead(br3)
|
||||||
|
MOVQ br_value, bitReaderShifted_value(br3)
|
||||||
|
MOVQ br_offset, bitReaderShifted_off(br3)
|
||||||
|
|
||||||
|
ADDQ $4, off // off += 2
|
||||||
|
|
||||||
|
TESTB DH, DH // any br[i].ofs < 4?
|
||||||
|
JNZ end
|
||||||
|
|
||||||
|
CMPQ off, $bufoff
|
||||||
|
JL main_loop
|
||||||
|
|
||||||
|
end:
|
||||||
|
MOVQ 0(SP), BP
|
||||||
|
|
||||||
|
MOVB off, ret+56(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
#undef off
|
||||||
|
#undef buffer
|
||||||
|
#undef table
|
||||||
|
|
||||||
|
#undef br_bits_read
|
||||||
|
#undef br_value
|
||||||
|
#undef br_offset
|
||||||
|
#undef peek_bits
|
||||||
|
#undef exhausted
|
||||||
|
|
||||||
|
#undef br0
|
||||||
|
#undef br1
|
||||||
|
#undef br2
|
||||||
|
#undef br3
|
197
vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in
generated
vendored
Normal file
197
vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in
generated
vendored
Normal file
|
@ -0,0 +1,197 @@
|
||||||
|
// +build !appengine
|
||||||
|
// +build gc
|
||||||
|
// +build !noasm
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
#include "funcdata.h"
|
||||||
|
#include "go_asm.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
||||||
|
|
||||||
|
//func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||||
|
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
||||||
|
TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8
|
||||||
|
#define off R8
|
||||||
|
#define buffer DI
|
||||||
|
#define table SI
|
||||||
|
|
||||||
|
#define br_bits_read R9
|
||||||
|
#define br_value R10
|
||||||
|
#define br_offset R11
|
||||||
|
#define peek_bits R12
|
||||||
|
#define exhausted DX
|
||||||
|
|
||||||
|
#define br0 R13
|
||||||
|
#define br1 R14
|
||||||
|
#define br2 R15
|
||||||
|
#define br3 BP
|
||||||
|
|
||||||
|
MOVQ BP, 0(SP)
|
||||||
|
|
||||||
|
XORQ exhausted, exhausted // exhausted = false
|
||||||
|
XORQ off, off // off = 0
|
||||||
|
|
||||||
|
MOVBQZX peekBits+32(FP), peek_bits
|
||||||
|
MOVQ buf+40(FP), buffer
|
||||||
|
MOVQ tbl+48(FP), table
|
||||||
|
|
||||||
|
MOVQ pbr0+0(FP), br0
|
||||||
|
MOVQ pbr1+8(FP), br1
|
||||||
|
MOVQ pbr2+16(FP), br2
|
||||||
|
MOVQ pbr3+24(FP), br3
|
||||||
|
|
||||||
|
main_loop:
|
||||||
|
{{ define "decode_2_values_x86" }}
|
||||||
|
// const stream = {{ var "id" }}
|
||||||
|
// br{{ var "id"}}.fillFast()
|
||||||
|
MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read
|
||||||
|
MOVQ bitReaderShifted_value(br{{ var "id" }}), br_value
|
||||||
|
MOVQ bitReaderShifted_off(br{{ var "id" }}), br_offset
|
||||||
|
|
||||||
|
// if b.bitsRead >= 32 {
|
||||||
|
CMPQ br_bits_read, $32
|
||||||
|
JB skip_fill{{ var "id" }}
|
||||||
|
|
||||||
|
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||||
|
SUBQ $4, br_offset // b.off -= 4
|
||||||
|
|
||||||
|
// v := b.in[b.off-4 : b.off]
|
||||||
|
// v = v[:4]
|
||||||
|
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
MOVQ bitReaderShifted_in(br{{ var "id" }}), AX
|
||||||
|
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||||
|
|
||||||
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||||
|
MOVQ br_bits_read, CX
|
||||||
|
SHLQ CL, AX
|
||||||
|
ORQ AX, br_value
|
||||||
|
|
||||||
|
// exhausted = exhausted || (br{{ var "id"}}.off < 4)
|
||||||
|
CMPQ br_offset, $4
|
||||||
|
SETLT DL
|
||||||
|
ORB DL, DH
|
||||||
|
// }
|
||||||
|
skip_fill{{ var "id" }}:
|
||||||
|
|
||||||
|
// val0 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v0 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br{{ var "id"}}.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// val1 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v1 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br{{ var "id"}}.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CX, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||||
|
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||||
|
MOVW BX, {{ var "bufofs" }}(buffer)(off*1)
|
||||||
|
|
||||||
|
// SECOND PART:
|
||||||
|
// val2 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v2 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br{{ var "id"}}.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// val3 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
// v3 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br{{ var "id"}}.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CX, br_value // value <<= n
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||||
|
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||||
|
MOVW BX, {{ var "bufofs" }}+2(buffer)(off*1)
|
||||||
|
|
||||||
|
// update the bitrader reader structure
|
||||||
|
MOVB br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }})
|
||||||
|
MOVQ br_value, bitReaderShifted_value(br{{ var "id" }})
|
||||||
|
MOVQ br_offset, bitReaderShifted_off(br{{ var "id" }})
|
||||||
|
{{ end }}
|
||||||
|
|
||||||
|
{{ set "id" "0" }}
|
||||||
|
{{ set "ofs" "0" }}
|
||||||
|
{{ set "bufofs" "0" }} {{/* id * bufoff */}}
|
||||||
|
{{ template "decode_2_values_x86" . }}
|
||||||
|
|
||||||
|
{{ set "id" "1" }}
|
||||||
|
{{ set "ofs" "8" }}
|
||||||
|
{{ set "bufofs" "256" }}
|
||||||
|
{{ template "decode_2_values_x86" . }}
|
||||||
|
|
||||||
|
{{ set "id" "2" }}
|
||||||
|
{{ set "ofs" "16" }}
|
||||||
|
{{ set "bufofs" "512" }}
|
||||||
|
{{ template "decode_2_values_x86" . }}
|
||||||
|
|
||||||
|
{{ set "id" "3" }}
|
||||||
|
{{ set "ofs" "24" }}
|
||||||
|
{{ set "bufofs" "768" }}
|
||||||
|
{{ template "decode_2_values_x86" . }}
|
||||||
|
|
||||||
|
ADDQ $4, off // off += 2
|
||||||
|
|
||||||
|
TESTB DH, DH // any br[i].ofs < 4?
|
||||||
|
JNZ end
|
||||||
|
|
||||||
|
CMPQ off, $bufoff
|
||||||
|
JL main_loop
|
||||||
|
end:
|
||||||
|
MOVQ 0(SP), BP
|
||||||
|
|
||||||
|
MOVB off, ret+56(FP)
|
||||||
|
RET
|
||||||
|
#undef off
|
||||||
|
#undef buffer
|
||||||
|
#undef table
|
||||||
|
|
||||||
|
#undef br_bits_read
|
||||||
|
#undef br_value
|
||||||
|
#undef br_offset
|
||||||
|
#undef peek_bits
|
||||||
|
#undef exhausted
|
||||||
|
|
||||||
|
#undef br0
|
||||||
|
#undef br1
|
||||||
|
#undef br2
|
||||||
|
#undef br3
|
|
@ -0,0 +1,181 @@
|
||||||
|
//go:build amd64 && !appengine && !noasm && gc
|
||||||
|
// +build amd64,!appengine,!noasm,gc
|
||||||
|
|
||||||
|
// This file contains the specialisation of Decoder.Decompress4X
|
||||||
|
// that uses an asm implementation of its main loop.
|
||||||
|
package huff0
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// decompress4x_main_loop_x86 is an x86 assembler implementation
|
||||||
|
// of Decompress4X when tablelog > 8.
|
||||||
|
// go:noescape
|
||||||
|
func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||||
|
peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
|
||||||
|
|
||||||
|
// decompress4x_8b_loop_x86 is an x86 assembler implementation
|
||||||
|
// of Decompress4X when tablelog <= 8 which decodes 4 entries
|
||||||
|
// per loop.
|
||||||
|
// go:noescape
|
||||||
|
func decompress4x_8b_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||||
|
peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
|
||||||
|
|
||||||
|
// fallback8BitSize is the size where using Go version is faster.
|
||||||
|
const fallback8BitSize = 800
|
||||||
|
|
||||||
|
// Decompress4X will decompress a 4X encoded stream.
|
||||||
|
// The length of the supplied input must match the end of a block exactly.
|
||||||
|
// The *capacity* of the dst slice must match the destination size of
|
||||||
|
// the uncompressed data exactly.
|
||||||
|
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||||
|
if len(d.dt.single) == 0 {
|
||||||
|
return nil, errors.New("no table loaded")
|
||||||
|
}
|
||||||
|
if len(src) < 6+(4*1) {
|
||||||
|
return nil, errors.New("input too small")
|
||||||
|
}
|
||||||
|
|
||||||
|
use8BitTables := d.actualTableLog <= 8
|
||||||
|
if cap(dst) < fallback8BitSize && use8BitTables {
|
||||||
|
return d.decompress4X8bit(dst, src)
|
||||||
|
}
|
||||||
|
var br [4]bitReaderShifted
|
||||||
|
// Decode "jump table"
|
||||||
|
start := 6
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
|
||||||
|
if start+length >= len(src) {
|
||||||
|
return nil, errors.New("truncated input (or invalid offset)")
|
||||||
|
}
|
||||||
|
err := br[i].init(src[start : start+length])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
start += length
|
||||||
|
}
|
||||||
|
err := br[3].init(src[start:])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// destination, offset to match first output
|
||||||
|
dstSize := cap(dst)
|
||||||
|
dst = dst[:dstSize]
|
||||||
|
out := dst
|
||||||
|
dstEvery := (dstSize + 3) / 4
|
||||||
|
|
||||||
|
const tlSize = 1 << tableLogMax
|
||||||
|
const tlMask = tlSize - 1
|
||||||
|
single := d.dt.single[:tlSize]
|
||||||
|
|
||||||
|
// Use temp table to avoid bound checks/append penalty.
|
||||||
|
buf := d.buffer()
|
||||||
|
var off uint8
|
||||||
|
var decoded int
|
||||||
|
|
||||||
|
const debug = false
|
||||||
|
|
||||||
|
// see: bitReaderShifted.peekBitsFast()
|
||||||
|
peekBits := uint8((64 - d.actualTableLog) & 63)
|
||||||
|
|
||||||
|
// Decode 2 values from each decoder/loop.
|
||||||
|
const bufoff = 256
|
||||||
|
for {
|
||||||
|
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if use8BitTables {
|
||||||
|
off = decompress4x_8b_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
|
||||||
|
} else {
|
||||||
|
off = decompress4x_main_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
|
||||||
|
}
|
||||||
|
if debug {
|
||||||
|
fmt.Print("DEBUG: ")
|
||||||
|
fmt.Printf("off=%d,", off)
|
||||||
|
for i := 0; i < 4; i++ {
|
||||||
|
fmt.Printf(" br[%d]={bitsRead=%d, value=%x, off=%d}",
|
||||||
|
i, br[i].bitsRead, br[i].value, br[i].off)
|
||||||
|
}
|
||||||
|
fmt.Println("")
|
||||||
|
}
|
||||||
|
|
||||||
|
if off != 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
if bufoff > dstEvery {
|
||||||
|
d.bufs.Put(buf)
|
||||||
|
return nil, errors.New("corruption detected: stream overrun 1")
|
||||||
|
}
|
||||||
|
copy(out, buf[0][:])
|
||||||
|
copy(out[dstEvery:], buf[1][:])
|
||||||
|
copy(out[dstEvery*2:], buf[2][:])
|
||||||
|
copy(out[dstEvery*3:], buf[3][:])
|
||||||
|
out = out[bufoff:]
|
||||||
|
decoded += bufoff * 4
|
||||||
|
// There must at least be 3 buffers left.
|
||||||
|
if len(out) < dstEvery*3 {
|
||||||
|
d.bufs.Put(buf)
|
||||||
|
return nil, errors.New("corruption detected: stream overrun 2")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if off > 0 {
|
||||||
|
ioff := int(off)
|
||||||
|
if len(out) < dstEvery*3+ioff {
|
||||||
|
d.bufs.Put(buf)
|
||||||
|
return nil, errors.New("corruption detected: stream overrun 3")
|
||||||
|
}
|
||||||
|
copy(out, buf[0][:off])
|
||||||
|
copy(out[dstEvery:], buf[1][:off])
|
||||||
|
copy(out[dstEvery*2:], buf[2][:off])
|
||||||
|
copy(out[dstEvery*3:], buf[3][:off])
|
||||||
|
decoded += int(off) * 4
|
||||||
|
out = out[off:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode remaining.
|
||||||
|
remainBytes := dstEvery - (decoded / 4)
|
||||||
|
for i := range br {
|
||||||
|
offset := dstEvery * i
|
||||||
|
endsAt := offset + remainBytes
|
||||||
|
if endsAt > len(out) {
|
||||||
|
endsAt = len(out)
|
||||||
|
}
|
||||||
|
br := &br[i]
|
||||||
|
bitsLeft := br.remaining()
|
||||||
|
for bitsLeft > 0 {
|
||||||
|
br.fill()
|
||||||
|
if offset >= endsAt {
|
||||||
|
d.bufs.Put(buf)
|
||||||
|
return nil, errors.New("corruption detected: stream overrun 4")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read value and increment offset.
|
||||||
|
val := br.peekBitsFast(d.actualTableLog)
|
||||||
|
v := single[val&tlMask].entry
|
||||||
|
nBits := uint8(v)
|
||||||
|
br.advance(nBits)
|
||||||
|
bitsLeft -= uint(nBits)
|
||||||
|
out[offset] = uint8(v >> 8)
|
||||||
|
offset++
|
||||||
|
}
|
||||||
|
if offset != endsAt {
|
||||||
|
d.bufs.Put(buf)
|
||||||
|
return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
|
||||||
|
}
|
||||||
|
decoded += offset - dstEvery*i
|
||||||
|
err = br.close()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
d.bufs.Put(buf)
|
||||||
|
if dstSize != decoded {
|
||||||
|
return nil, errors.New("corruption detected: short output block")
|
||||||
|
}
|
||||||
|
return dst, nil
|
||||||
|
}
|
|
@ -0,0 +1,506 @@
|
||||||
|
// +build !appengine
|
||||||
|
// +build gc
|
||||||
|
// +build !noasm
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
#include "funcdata.h"
|
||||||
|
#include "go_asm.h"
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v4
|
||||||
|
#ifndef GOAMD64_v3
|
||||||
|
#define GOAMD64_v3
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
||||||
|
|
||||||
|
// func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||||
|
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
||||||
|
TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8
|
||||||
|
#define off R8
|
||||||
|
#define buffer DI
|
||||||
|
#define table SI
|
||||||
|
|
||||||
|
#define br_bits_read R9
|
||||||
|
#define br_value R10
|
||||||
|
#define br_offset R11
|
||||||
|
#define peek_bits R12
|
||||||
|
#define exhausted DX
|
||||||
|
|
||||||
|
#define br0 R13
|
||||||
|
#define br1 R14
|
||||||
|
#define br2 R15
|
||||||
|
#define br3 BP
|
||||||
|
|
||||||
|
MOVQ BP, 0(SP)
|
||||||
|
|
||||||
|
XORQ exhausted, exhausted // exhausted = false
|
||||||
|
XORQ off, off // off = 0
|
||||||
|
|
||||||
|
MOVBQZX peekBits+32(FP), peek_bits
|
||||||
|
MOVQ buf+40(FP), buffer
|
||||||
|
MOVQ tbl+48(FP), table
|
||||||
|
|
||||||
|
MOVQ pbr0+0(FP), br0
|
||||||
|
MOVQ pbr1+8(FP), br1
|
||||||
|
MOVQ pbr2+16(FP), br2
|
||||||
|
MOVQ pbr3+24(FP), br3
|
||||||
|
|
||||||
|
main_loop:
|
||||||
|
|
||||||
|
// const stream = 0
|
||||||
|
// br0.fillFast()
|
||||||
|
MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read
|
||||||
|
MOVQ bitReaderShifted_value(br0), br_value
|
||||||
|
MOVQ bitReaderShifted_off(br0), br_offset
|
||||||
|
|
||||||
|
// We must have at least 2 * max tablelog left
|
||||||
|
CMPQ br_bits_read, $64-22
|
||||||
|
JBE skip_fill0
|
||||||
|
|
||||||
|
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||||
|
SUBQ $4, br_offset // b.off -= 4
|
||||||
|
|
||||||
|
// v := b.in[b.off-4 : b.off]
|
||||||
|
// v = v[:4]
|
||||||
|
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
MOVQ bitReaderShifted_in(br0), AX
|
||||||
|
|
||||||
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||||
|
MOVQ br_bits_read, CX
|
||||||
|
SHLQ CL, AX
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ORQ AX, br_value
|
||||||
|
|
||||||
|
// exhausted = exhausted || (br0.off < 4)
|
||||||
|
CMPQ br_offset, $4
|
||||||
|
SETLT DL
|
||||||
|
ORB DL, DH
|
||||||
|
|
||||||
|
// }
|
||||||
|
skip_fill0:
|
||||||
|
|
||||||
|
// val0 := br0.peekTopBits(peekBits)
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// v0 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br0.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLXQ AX, br_value, br_value // value <<= n
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#else
|
||||||
|
// val1 := br0.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// v1 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br0.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLXQ AX, br_value, br_value // value <<= n
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||||
|
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||||
|
MOVW BX, 0(buffer)(off*1)
|
||||||
|
|
||||||
|
// update the bitrader reader structure
|
||||||
|
MOVB br_bits_read, bitReaderShifted_bitsRead(br0)
|
||||||
|
MOVQ br_value, bitReaderShifted_value(br0)
|
||||||
|
MOVQ br_offset, bitReaderShifted_off(br0)
|
||||||
|
|
||||||
|
// const stream = 1
|
||||||
|
// br1.fillFast()
|
||||||
|
MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read
|
||||||
|
MOVQ bitReaderShifted_value(br1), br_value
|
||||||
|
MOVQ bitReaderShifted_off(br1), br_offset
|
||||||
|
|
||||||
|
// We must have at least 2 * max tablelog left
|
||||||
|
CMPQ br_bits_read, $64-22
|
||||||
|
JBE skip_fill1
|
||||||
|
|
||||||
|
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||||
|
SUBQ $4, br_offset // b.off -= 4
|
||||||
|
|
||||||
|
// v := b.in[b.off-4 : b.off]
|
||||||
|
// v = v[:4]
|
||||||
|
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
MOVQ bitReaderShifted_in(br1), AX
|
||||||
|
|
||||||
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||||
|
MOVQ br_bits_read, CX
|
||||||
|
SHLQ CL, AX
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ORQ AX, br_value
|
||||||
|
|
||||||
|
// exhausted = exhausted || (br1.off < 4)
|
||||||
|
CMPQ br_offset, $4
|
||||||
|
SETLT DL
|
||||||
|
ORB DL, DH
|
||||||
|
|
||||||
|
// }
|
||||||
|
skip_fill1:
|
||||||
|
|
||||||
|
// val0 := br1.peekTopBits(peekBits)
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// v0 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br1.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLXQ AX, br_value, br_value // value <<= n
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#else
|
||||||
|
// val1 := br1.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// v1 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br1.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLXQ AX, br_value, br_value // value <<= n
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||||
|
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||||
|
MOVW BX, 256(buffer)(off*1)
|
||||||
|
|
||||||
|
// update the bitrader reader structure
|
||||||
|
MOVB br_bits_read, bitReaderShifted_bitsRead(br1)
|
||||||
|
MOVQ br_value, bitReaderShifted_value(br1)
|
||||||
|
MOVQ br_offset, bitReaderShifted_off(br1)
|
||||||
|
|
||||||
|
// const stream = 2
|
||||||
|
// br2.fillFast()
|
||||||
|
MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read
|
||||||
|
MOVQ bitReaderShifted_value(br2), br_value
|
||||||
|
MOVQ bitReaderShifted_off(br2), br_offset
|
||||||
|
|
||||||
|
// We must have at least 2 * max tablelog left
|
||||||
|
CMPQ br_bits_read, $64-22
|
||||||
|
JBE skip_fill2
|
||||||
|
|
||||||
|
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||||
|
SUBQ $4, br_offset // b.off -= 4
|
||||||
|
|
||||||
|
// v := b.in[b.off-4 : b.off]
|
||||||
|
// v = v[:4]
|
||||||
|
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
MOVQ bitReaderShifted_in(br2), AX
|
||||||
|
|
||||||
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||||
|
MOVQ br_bits_read, CX
|
||||||
|
SHLQ CL, AX
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ORQ AX, br_value
|
||||||
|
|
||||||
|
// exhausted = exhausted || (br2.off < 4)
|
||||||
|
CMPQ br_offset, $4
|
||||||
|
SETLT DL
|
||||||
|
ORB DL, DH
|
||||||
|
|
||||||
|
// }
|
||||||
|
skip_fill2:
|
||||||
|
|
||||||
|
// val0 := br2.peekTopBits(peekBits)
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// v0 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br2.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLXQ AX, br_value, br_value // value <<= n
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#else
|
||||||
|
// val1 := br2.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// v1 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br2.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLXQ AX, br_value, br_value // value <<= n
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||||
|
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||||
|
MOVW BX, 512(buffer)(off*1)
|
||||||
|
|
||||||
|
// update the bitrader reader structure
|
||||||
|
MOVB br_bits_read, bitReaderShifted_bitsRead(br2)
|
||||||
|
MOVQ br_value, bitReaderShifted_value(br2)
|
||||||
|
MOVQ br_offset, bitReaderShifted_off(br2)
|
||||||
|
|
||||||
|
// const stream = 3
|
||||||
|
// br3.fillFast()
|
||||||
|
MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read
|
||||||
|
MOVQ bitReaderShifted_value(br3), br_value
|
||||||
|
MOVQ bitReaderShifted_off(br3), br_offset
|
||||||
|
|
||||||
|
// We must have at least 2 * max tablelog left
|
||||||
|
CMPQ br_bits_read, $64-22
|
||||||
|
JBE skip_fill3
|
||||||
|
|
||||||
|
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||||
|
SUBQ $4, br_offset // b.off -= 4
|
||||||
|
|
||||||
|
// v := b.in[b.off-4 : b.off]
|
||||||
|
// v = v[:4]
|
||||||
|
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
MOVQ bitReaderShifted_in(br3), AX
|
||||||
|
|
||||||
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||||
|
MOVQ br_bits_read, CX
|
||||||
|
SHLQ CL, AX
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ORQ AX, br_value
|
||||||
|
|
||||||
|
// exhausted = exhausted || (br3.off < 4)
|
||||||
|
CMPQ br_offset, $4
|
||||||
|
SETLT DL
|
||||||
|
ORB DL, DH
|
||||||
|
|
||||||
|
// }
|
||||||
|
skip_fill3:
|
||||||
|
|
||||||
|
// val0 := br3.peekTopBits(peekBits)
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// v0 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br3.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLXQ AX, br_value, br_value // value <<= n
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#else
|
||||||
|
// val1 := br3.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// v1 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br3.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLXQ AX, br_value, br_value // value <<= n
|
||||||
|
|
||||||
|
#else
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||||
|
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||||
|
MOVW BX, 768(buffer)(off*1)
|
||||||
|
|
||||||
|
// update the bitrader reader structure
|
||||||
|
MOVB br_bits_read, bitReaderShifted_bitsRead(br3)
|
||||||
|
MOVQ br_value, bitReaderShifted_value(br3)
|
||||||
|
MOVQ br_offset, bitReaderShifted_off(br3)
|
||||||
|
|
||||||
|
ADDQ $2, off // off += 2
|
||||||
|
|
||||||
|
TESTB DH, DH // any br[i].ofs < 4?
|
||||||
|
JNZ end
|
||||||
|
|
||||||
|
CMPQ off, $bufoff
|
||||||
|
JL main_loop
|
||||||
|
|
||||||
|
end:
|
||||||
|
MOVQ 0(SP), BP
|
||||||
|
|
||||||
|
MOVB off, ret+56(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
#undef off
|
||||||
|
#undef buffer
|
||||||
|
#undef table
|
||||||
|
|
||||||
|
#undef br_bits_read
|
||||||
|
#undef br_value
|
||||||
|
#undef br_offset
|
||||||
|
#undef peek_bits
|
||||||
|
#undef exhausted
|
||||||
|
|
||||||
|
#undef br0
|
||||||
|
#undef br1
|
||||||
|
#undef br2
|
||||||
|
#undef br3
|
|
@ -0,0 +1,195 @@
|
||||||
|
// +build !appengine
|
||||||
|
// +build gc
|
||||||
|
// +build !noasm
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
#include "funcdata.h"
|
||||||
|
#include "go_asm.h"
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v4
|
||||||
|
#ifndef GOAMD64_v3
|
||||||
|
#define GOAMD64_v3
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
||||||
|
|
||||||
|
//func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||||
|
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
||||||
|
TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8
|
||||||
|
#define off R8
|
||||||
|
#define buffer DI
|
||||||
|
#define table SI
|
||||||
|
|
||||||
|
#define br_bits_read R9
|
||||||
|
#define br_value R10
|
||||||
|
#define br_offset R11
|
||||||
|
#define peek_bits R12
|
||||||
|
#define exhausted DX
|
||||||
|
|
||||||
|
#define br0 R13
|
||||||
|
#define br1 R14
|
||||||
|
#define br2 R15
|
||||||
|
#define br3 BP
|
||||||
|
|
||||||
|
MOVQ BP, 0(SP)
|
||||||
|
|
||||||
|
XORQ exhausted, exhausted // exhausted = false
|
||||||
|
XORQ off, off // off = 0
|
||||||
|
|
||||||
|
MOVBQZX peekBits+32(FP), peek_bits
|
||||||
|
MOVQ buf+40(FP), buffer
|
||||||
|
MOVQ tbl+48(FP), table
|
||||||
|
|
||||||
|
MOVQ pbr0+0(FP), br0
|
||||||
|
MOVQ pbr1+8(FP), br1
|
||||||
|
MOVQ pbr2+16(FP), br2
|
||||||
|
MOVQ pbr3+24(FP), br3
|
||||||
|
|
||||||
|
main_loop:
|
||||||
|
{{ define "decode_2_values_x86" }}
|
||||||
|
// const stream = {{ var "id" }}
|
||||||
|
// br{{ var "id"}}.fillFast()
|
||||||
|
MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read
|
||||||
|
MOVQ bitReaderShifted_value(br{{ var "id" }}), br_value
|
||||||
|
MOVQ bitReaderShifted_off(br{{ var "id" }}), br_offset
|
||||||
|
|
||||||
|
// We must have at least 2 * max tablelog left
|
||||||
|
CMPQ br_bits_read, $64-22
|
||||||
|
JBE skip_fill{{ var "id" }}
|
||||||
|
|
||||||
|
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||||
|
SUBQ $4, br_offset // b.off -= 4
|
||||||
|
|
||||||
|
// v := b.in[b.off-4 : b.off]
|
||||||
|
// v = v[:4]
|
||||||
|
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||||
|
MOVQ bitReaderShifted_in(br{{ var "id" }}), AX
|
||||||
|
|
||||||
|
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||||
|
#else
|
||||||
|
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||||
|
MOVQ br_bits_read, CX
|
||||||
|
SHLQ CL, AX
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ORQ AX, br_value
|
||||||
|
|
||||||
|
// exhausted = exhausted || (br{{ var "id"}}.off < 4)
|
||||||
|
CMPQ br_offset, $4
|
||||||
|
SETLT DL
|
||||||
|
ORB DL, DH
|
||||||
|
// }
|
||||||
|
skip_fill{{ var "id" }}:
|
||||||
|
|
||||||
|
// val0 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||||
|
#else
|
||||||
|
MOVQ br_value, AX
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// v0 := table[val0&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v0
|
||||||
|
|
||||||
|
// br{{ var "id"}}.advance(uint8(v0.entry))
|
||||||
|
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLXQ AX, br_value, br_value // value <<= n
|
||||||
|
#else
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||||
|
#else
|
||||||
|
// val1 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||||
|
MOVQ peek_bits, CX
|
||||||
|
MOVQ br_value, AX
|
||||||
|
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// v1 := table[val1&mask]
|
||||||
|
MOVW 0(table)(AX*2), AX // AX - v1
|
||||||
|
|
||||||
|
// br{{ var "id"}}.advance(uint8(v1.entry))
|
||||||
|
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||||
|
|
||||||
|
#ifdef GOAMD64_v3
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLXQ AX, br_value, br_value // value <<= n
|
||||||
|
#else
|
||||||
|
MOVBQZX AL, CX
|
||||||
|
SHLQ CL, br_value // value <<= n
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ADDQ CX, br_bits_read // bits_read += n
|
||||||
|
|
||||||
|
|
||||||
|
// these two writes get coalesced
|
||||||
|
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||||
|
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||||
|
MOVW BX, {{ var "bufofs" }}(buffer)(off*1)
|
||||||
|
|
||||||
|
// update the bitrader reader structure
|
||||||
|
MOVB br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }})
|
||||||
|
MOVQ br_value, bitReaderShifted_value(br{{ var "id" }})
|
||||||
|
MOVQ br_offset, bitReaderShifted_off(br{{ var "id" }})
|
||||||
|
{{ end }}
|
||||||
|
|
||||||
|
{{ set "id" "0" }}
|
||||||
|
{{ set "ofs" "0" }}
|
||||||
|
{{ set "bufofs" "0" }} {{/* id * bufoff */}}
|
||||||
|
{{ template "decode_2_values_x86" . }}
|
||||||
|
|
||||||
|
{{ set "id" "1" }}
|
||||||
|
{{ set "ofs" "8" }}
|
||||||
|
{{ set "bufofs" "256" }}
|
||||||
|
{{ template "decode_2_values_x86" . }}
|
||||||
|
|
||||||
|
{{ set "id" "2" }}
|
||||||
|
{{ set "ofs" "16" }}
|
||||||
|
{{ set "bufofs" "512" }}
|
||||||
|
{{ template "decode_2_values_x86" . }}
|
||||||
|
|
||||||
|
{{ set "id" "3" }}
|
||||||
|
{{ set "ofs" "24" }}
|
||||||
|
{{ set "bufofs" "768" }}
|
||||||
|
{{ template "decode_2_values_x86" . }}
|
||||||
|
|
||||||
|
ADDQ $2, off // off += 2
|
||||||
|
|
||||||
|
TESTB DH, DH // any br[i].ofs < 4?
|
||||||
|
JNZ end
|
||||||
|
|
||||||
|
CMPQ off, $bufoff
|
||||||
|
JL main_loop
|
||||||
|
end:
|
||||||
|
MOVQ 0(SP), BP
|
||||||
|
|
||||||
|
MOVB off, ret+56(FP)
|
||||||
|
RET
|
||||||
|
#undef off
|
||||||
|
#undef buffer
|
||||||
|
#undef table
|
||||||
|
|
||||||
|
#undef br_bits_read
|
||||||
|
#undef br_value
|
||||||
|
#undef br_offset
|
||||||
|
#undef peek_bits
|
||||||
|
#undef exhausted
|
||||||
|
|
||||||
|
#undef br0
|
||||||
|
#undef br1
|
||||||
|
#undef br2
|
||||||
|
#undef br3
|
|
@ -0,0 +1,193 @@
|
||||||
|
//go:build !amd64 || appengine || !gc || noasm
|
||||||
|
// +build !amd64 appengine !gc noasm
|
||||||
|
|
||||||
|
// This file contains a generic implementation of Decoder.Decompress4X.
|
||||||
|
package huff0
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Decompress4X will decompress a 4X encoded stream.
|
||||||
|
// The length of the supplied input must match the end of a block exactly.
|
||||||
|
// The *capacity* of the dst slice must match the destination size of
|
||||||
|
// the uncompressed data exactly.
|
||||||
|
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||||
|
if len(d.dt.single) == 0 {
|
||||||
|
return nil, errors.New("no table loaded")
|
||||||
|
}
|
||||||
|
if len(src) < 6+(4*1) {
|
||||||
|
return nil, errors.New("input too small")
|
||||||
|
}
|
||||||
|
if use8BitTables && d.actualTableLog <= 8 {
|
||||||
|
return d.decompress4X8bit(dst, src)
|
||||||
|
}
|
||||||
|
|
||||||
|
var br [4]bitReaderShifted
|
||||||
|
// Decode "jump table"
|
||||||
|
start := 6
|
||||||
|
for i := 0; i < 3; i++ {
|
||||||
|
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
|
||||||
|
if start+length >= len(src) {
|
||||||
|
return nil, errors.New("truncated input (or invalid offset)")
|
||||||
|
}
|
||||||
|
err := br[i].init(src[start : start+length])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
start += length
|
||||||
|
}
|
||||||
|
err := br[3].init(src[start:])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// destination, offset to match first output
|
||||||
|
dstSize := cap(dst)
|
||||||
|
dst = dst[:dstSize]
|
||||||
|
out := dst
|
||||||
|
dstEvery := (dstSize + 3) / 4
|
||||||
|
|
||||||
|
const tlSize = 1 << tableLogMax
|
||||||
|
const tlMask = tlSize - 1
|
||||||
|
single := d.dt.single[:tlSize]
|
||||||
|
|
||||||
|
// Use temp table to avoid bound checks/append penalty.
|
||||||
|
buf := d.buffer()
|
||||||
|
var off uint8
|
||||||
|
var decoded int
|
||||||
|
|
||||||
|
// Decode 2 values from each decoder/loop.
|
||||||
|
const bufoff = 256
|
||||||
|
for {
|
||||||
|
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const stream = 0
|
||||||
|
const stream2 = 1
|
||||||
|
br[stream].fillFast()
|
||||||
|
br[stream2].fillFast()
|
||||||
|
|
||||||
|
val := br[stream].peekBitsFast(d.actualTableLog)
|
||||||
|
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
||||||
|
v := single[val&tlMask]
|
||||||
|
v2 := single[val2&tlMask]
|
||||||
|
br[stream].advance(uint8(v.entry))
|
||||||
|
br[stream2].advance(uint8(v2.entry))
|
||||||
|
buf[stream][off] = uint8(v.entry >> 8)
|
||||||
|
buf[stream2][off] = uint8(v2.entry >> 8)
|
||||||
|
|
||||||
|
val = br[stream].peekBitsFast(d.actualTableLog)
|
||||||
|
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
||||||
|
v = single[val&tlMask]
|
||||||
|
v2 = single[val2&tlMask]
|
||||||
|
br[stream].advance(uint8(v.entry))
|
||||||
|
br[stream2].advance(uint8(v2.entry))
|
||||||
|
buf[stream][off+1] = uint8(v.entry >> 8)
|
||||||
|
buf[stream2][off+1] = uint8(v2.entry >> 8)
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const stream = 2
|
||||||
|
const stream2 = 3
|
||||||
|
br[stream].fillFast()
|
||||||
|
br[stream2].fillFast()
|
||||||
|
|
||||||
|
val := br[stream].peekBitsFast(d.actualTableLog)
|
||||||
|
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
||||||
|
v := single[val&tlMask]
|
||||||
|
v2 := single[val2&tlMask]
|
||||||
|
br[stream].advance(uint8(v.entry))
|
||||||
|
br[stream2].advance(uint8(v2.entry))
|
||||||
|
buf[stream][off] = uint8(v.entry >> 8)
|
||||||
|
buf[stream2][off] = uint8(v2.entry >> 8)
|
||||||
|
|
||||||
|
val = br[stream].peekBitsFast(d.actualTableLog)
|
||||||
|
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
||||||
|
v = single[val&tlMask]
|
||||||
|
v2 = single[val2&tlMask]
|
||||||
|
br[stream].advance(uint8(v.entry))
|
||||||
|
br[stream2].advance(uint8(v2.entry))
|
||||||
|
buf[stream][off+1] = uint8(v.entry >> 8)
|
||||||
|
buf[stream2][off+1] = uint8(v2.entry >> 8)
|
||||||
|
}
|
||||||
|
|
||||||
|
off += 2
|
||||||
|
|
||||||
|
if off == 0 {
|
||||||
|
if bufoff > dstEvery {
|
||||||
|
d.bufs.Put(buf)
|
||||||
|
return nil, errors.New("corruption detected: stream overrun 1")
|
||||||
|
}
|
||||||
|
copy(out, buf[0][:])
|
||||||
|
copy(out[dstEvery:], buf[1][:])
|
||||||
|
copy(out[dstEvery*2:], buf[2][:])
|
||||||
|
copy(out[dstEvery*3:], buf[3][:])
|
||||||
|
out = out[bufoff:]
|
||||||
|
decoded += bufoff * 4
|
||||||
|
// There must at least be 3 buffers left.
|
||||||
|
if len(out) < dstEvery*3 {
|
||||||
|
d.bufs.Put(buf)
|
||||||
|
return nil, errors.New("corruption detected: stream overrun 2")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if off > 0 {
|
||||||
|
ioff := int(off)
|
||||||
|
if len(out) < dstEvery*3+ioff {
|
||||||
|
d.bufs.Put(buf)
|
||||||
|
return nil, errors.New("corruption detected: stream overrun 3")
|
||||||
|
}
|
||||||
|
copy(out, buf[0][:off])
|
||||||
|
copy(out[dstEvery:], buf[1][:off])
|
||||||
|
copy(out[dstEvery*2:], buf[2][:off])
|
||||||
|
copy(out[dstEvery*3:], buf[3][:off])
|
||||||
|
decoded += int(off) * 4
|
||||||
|
out = out[off:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode remaining.
|
||||||
|
remainBytes := dstEvery - (decoded / 4)
|
||||||
|
for i := range br {
|
||||||
|
offset := dstEvery * i
|
||||||
|
endsAt := offset + remainBytes
|
||||||
|
if endsAt > len(out) {
|
||||||
|
endsAt = len(out)
|
||||||
|
}
|
||||||
|
br := &br[i]
|
||||||
|
bitsLeft := br.remaining()
|
||||||
|
for bitsLeft > 0 {
|
||||||
|
br.fill()
|
||||||
|
if offset >= endsAt {
|
||||||
|
d.bufs.Put(buf)
|
||||||
|
return nil, errors.New("corruption detected: stream overrun 4")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read value and increment offset.
|
||||||
|
val := br.peekBitsFast(d.actualTableLog)
|
||||||
|
v := single[val&tlMask].entry
|
||||||
|
nBits := uint8(v)
|
||||||
|
br.advance(nBits)
|
||||||
|
bitsLeft -= uint(nBits)
|
||||||
|
out[offset] = uint8(v >> 8)
|
||||||
|
offset++
|
||||||
|
}
|
||||||
|
if offset != endsAt {
|
||||||
|
d.bufs.Put(buf)
|
||||||
|
return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
|
||||||
|
}
|
||||||
|
decoded += offset - dstEvery*i
|
||||||
|
err = br.close()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
d.bufs.Put(buf)
|
||||||
|
if dstSize != decoded {
|
||||||
|
return nil, errors.New("corruption detected: short output block")
|
||||||
|
}
|
||||||
|
return dst, nil
|
||||||
|
}
|
|
@ -153,10 +153,10 @@ http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip
|
||||||
|
|
||||||
This package:
|
This package:
|
||||||
file out level insize outsize millis mb/s
|
file out level insize outsize millis mb/s
|
||||||
silesia.tar zskp 1 211947520 73101992 643 313.87
|
silesia.tar zskp 1 211947520 73821326 634 318.47
|
||||||
silesia.tar zskp 2 211947520 67504318 969 208.38
|
silesia.tar zskp 2 211947520 67655404 1508 133.96
|
||||||
silesia.tar zskp 3 211947520 64595893 2007 100.68
|
silesia.tar zskp 3 211947520 64746933 3000 67.37
|
||||||
silesia.tar zskp 4 211947520 60995370 8825 22.90
|
silesia.tar zskp 4 211947520 60073508 16926 11.94
|
||||||
|
|
||||||
cgo zstd:
|
cgo zstd:
|
||||||
silesia.tar zstd 1 211947520 73605392 543 371.56
|
silesia.tar zstd 1 211947520 73605392 543 371.56
|
||||||
|
@ -165,94 +165,94 @@ silesia.tar zstd 6 211947520 62916450 1913 105.66
|
||||||
silesia.tar zstd 9 211947520 60212393 5063 39.92
|
silesia.tar zstd 9 211947520 60212393 5063 39.92
|
||||||
|
|
||||||
gzip, stdlib/this package:
|
gzip, stdlib/this package:
|
||||||
silesia.tar gzstd 1 211947520 80007735 1654 122.21
|
silesia.tar gzstd 1 211947520 80007735 1498 134.87
|
||||||
silesia.tar gzkp 1 211947520 80136201 1152 175.45
|
silesia.tar gzkp 1 211947520 80088272 1009 200.31
|
||||||
|
|
||||||
GOB stream of binary data. Highly compressible.
|
GOB stream of binary data. Highly compressible.
|
||||||
https://files.klauspost.com/compress/gob-stream.7z
|
https://files.klauspost.com/compress/gob-stream.7z
|
||||||
|
|
||||||
file out level insize outsize millis mb/s
|
file out level insize outsize millis mb/s
|
||||||
gob-stream zskp 1 1911399616 235022249 3088 590.30
|
gob-stream zskp 1 1911399616 233948096 3230 564.34
|
||||||
gob-stream zskp 2 1911399616 205669791 3786 481.34
|
gob-stream zskp 2 1911399616 203997694 4997 364.73
|
||||||
gob-stream zskp 3 1911399616 175034659 9636 189.17
|
gob-stream zskp 3 1911399616 173526523 13435 135.68
|
||||||
gob-stream zskp 4 1911399616 165609838 50369 36.19
|
gob-stream zskp 4 1911399616 162195235 47559 38.33
|
||||||
|
|
||||||
gob-stream zstd 1 1911399616 249810424 2637 691.26
|
gob-stream zstd 1 1911399616 249810424 2637 691.26
|
||||||
gob-stream zstd 3 1911399616 208192146 3490 522.31
|
gob-stream zstd 3 1911399616 208192146 3490 522.31
|
||||||
gob-stream zstd 6 1911399616 193632038 6687 272.56
|
gob-stream zstd 6 1911399616 193632038 6687 272.56
|
||||||
gob-stream zstd 9 1911399616 177620386 16175 112.70
|
gob-stream zstd 9 1911399616 177620386 16175 112.70
|
||||||
|
|
||||||
gob-stream gzstd 1 1911399616 357382641 10251 177.82
|
gob-stream gzstd 1 1911399616 357382013 9046 201.49
|
||||||
gob-stream gzkp 1 1911399616 359753026 5438 335.20
|
gob-stream gzkp 1 1911399616 359136669 4885 373.08
|
||||||
|
|
||||||
The test data for the Large Text Compression Benchmark is the first
|
The test data for the Large Text Compression Benchmark is the first
|
||||||
10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
|
10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
|
||||||
http://mattmahoney.net/dc/textdata.html
|
http://mattmahoney.net/dc/textdata.html
|
||||||
|
|
||||||
file out level insize outsize millis mb/s
|
file out level insize outsize millis mb/s
|
||||||
enwik9 zskp 1 1000000000 343848582 3609 264.18
|
enwik9 zskp 1 1000000000 343833605 3687 258.64
|
||||||
enwik9 zskp 2 1000000000 317276632 5746 165.97
|
enwik9 zskp 2 1000000000 317001237 7672 124.29
|
||||||
enwik9 zskp 3 1000000000 292243069 12162 78.41
|
enwik9 zskp 3 1000000000 291915823 15923 59.89
|
||||||
enwik9 zskp 4 1000000000 262183768 82837 11.51
|
enwik9 zskp 4 1000000000 261710291 77697 12.27
|
||||||
|
|
||||||
enwik9 zstd 1 1000000000 358072021 3110 306.65
|
enwik9 zstd 1 1000000000 358072021 3110 306.65
|
||||||
enwik9 zstd 3 1000000000 313734672 4784 199.35
|
enwik9 zstd 3 1000000000 313734672 4784 199.35
|
||||||
enwik9 zstd 6 1000000000 295138875 10290 92.68
|
enwik9 zstd 6 1000000000 295138875 10290 92.68
|
||||||
enwik9 zstd 9 1000000000 278348700 28549 33.40
|
enwik9 zstd 9 1000000000 278348700 28549 33.40
|
||||||
|
|
||||||
enwik9 gzstd 1 1000000000 382578136 9604 99.30
|
enwik9 gzstd 1 1000000000 382578136 8608 110.78
|
||||||
enwik9 gzkp 1 1000000000 383825945 6544 145.73
|
enwik9 gzkp 1 1000000000 382781160 5628 169.45
|
||||||
|
|
||||||
Highly compressible JSON file.
|
Highly compressible JSON file.
|
||||||
https://files.klauspost.com/compress/github-june-2days-2019.json.zst
|
https://files.klauspost.com/compress/github-june-2days-2019.json.zst
|
||||||
|
|
||||||
file out level insize outsize millis mb/s
|
file out level insize outsize millis mb/s
|
||||||
github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40
|
github-june-2days-2019.json zskp 1 6273951764 697439532 9789 611.17
|
||||||
github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96
|
github-june-2days-2019.json zskp 2 6273951764 610876538 18553 322.49
|
||||||
github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75
|
github-june-2days-2019.json zskp 3 6273951764 517662858 44186 135.41
|
||||||
github-june-2days-2019.json zskp 4 6273951764 470320075 170190 35.16
|
github-june-2days-2019.json zskp 4 6273951764 464617114 165373 36.18
|
||||||
|
|
||||||
github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00
|
github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00
|
||||||
github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57
|
github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57
|
||||||
github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18
|
github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18
|
||||||
github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16
|
github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16
|
||||||
|
|
||||||
github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79
|
github-june-2days-2019.json gzstd 1 6273951764 1164397768 26793 223.32
|
||||||
github-june-2days-2019.json gzkp 1 6273951764 1125417694 21788 274.61
|
github-june-2days-2019.json gzkp 1 6273951764 1120631856 17693 338.16
|
||||||
|
|
||||||
VM Image, Linux mint with a few installed applications:
|
VM Image, Linux mint with a few installed applications:
|
||||||
https://files.klauspost.com/compress/rawstudio-mint14.7z
|
https://files.klauspost.com/compress/rawstudio-mint14.7z
|
||||||
|
|
||||||
file out level insize outsize millis mb/s
|
file out level insize outsize millis mb/s
|
||||||
rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84
|
rawstudio-mint14.tar zskp 1 8558382592 3718400221 18206 448.29
|
||||||
rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07
|
rawstudio-mint14.tar zskp 2 8558382592 3326118337 37074 220.15
|
||||||
rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08
|
rawstudio-mint14.tar zskp 3 8558382592 3163842361 87306 93.49
|
||||||
rawstudio-mint14.tar zskp 4 8558382592 2965110639 857750 9.52
|
rawstudio-mint14.tar zskp 4 8558382592 2970480650 783862 10.41
|
||||||
|
|
||||||
rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27
|
rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27
|
||||||
rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92
|
rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92
|
||||||
rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77
|
rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77
|
||||||
rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91
|
rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91
|
||||||
|
|
||||||
rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40
|
rawstudio-mint14.tar gzstd 1 8558382592 3926234992 51345 158.96
|
||||||
rawstudio-mint14.tar gzkp 1 8558382592 3962605659 45113 180.92
|
rawstudio-mint14.tar gzkp 1 8558382592 3960117298 36722 222.26
|
||||||
|
|
||||||
CSV data:
|
CSV data:
|
||||||
https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
|
https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
|
||||||
|
|
||||||
file out level insize outsize millis mb/s
|
file out level insize outsize millis mb/s
|
||||||
nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35
|
nyc-taxi-data-10M.csv zskp 1 3325605752 641319332 9462 335.17
|
||||||
nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44
|
nyc-taxi-data-10M.csv zskp 2 3325605752 588976126 17570 180.50
|
||||||
nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66
|
nyc-taxi-data-10M.csv zskp 3 3325605752 529329260 32432 97.79
|
||||||
nyc-taxi-data-10M.csv zskp 4 3325605752 476268884 135958 23.33
|
nyc-taxi-data-10M.csv zskp 4 3325605752 474949772 138025 22.98
|
||||||
|
|
||||||
nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18
|
nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18
|
||||||
nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07
|
nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07
|
||||||
nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27
|
nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27
|
||||||
nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12
|
nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12
|
||||||
|
|
||||||
nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83
|
nyc-taxi-data-10M.csv gzstd 1 3325605752 928654908 21270 149.11
|
||||||
nyc-taxi-data-10M.csv gzkp 1 3325605752 922257165 16780 189.00
|
nyc-taxi-data-10M.csv gzkp 1 3325605752 922273214 13929 227.68
|
||||||
```
|
```
|
||||||
|
|
||||||
## Decompressor
|
## Decompressor
|
||||||
|
|
|
@ -167,6 +167,11 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
|
||||||
}
|
}
|
||||||
return ErrCompressedSizeTooBig
|
return ErrCompressedSizeTooBig
|
||||||
}
|
}
|
||||||
|
// Empty compressed blocks must at least be 2 bytes
|
||||||
|
// for Literals_Block_Type and one for Sequences_Section_Header.
|
||||||
|
if cSize < 2 {
|
||||||
|
return ErrBlockTooSmall
|
||||||
|
}
|
||||||
case blockTypeRaw:
|
case blockTypeRaw:
|
||||||
if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
|
if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
|
@ -491,6 +496,9 @@ func (b *blockDec) decodeCompressed(hist *history) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
|
func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
|
||||||
|
if debugDecoder {
|
||||||
|
printf("prepareSequences: %d byte(s) input\n", len(in))
|
||||||
|
}
|
||||||
// Decode Sequences
|
// Decode Sequences
|
||||||
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section
|
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section
|
||||||
if len(in) < 1 {
|
if len(in) < 1 {
|
||||||
|
@ -499,8 +507,6 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
|
||||||
var nSeqs int
|
var nSeqs int
|
||||||
seqHeader := in[0]
|
seqHeader := in[0]
|
||||||
switch {
|
switch {
|
||||||
case seqHeader == 0:
|
|
||||||
in = in[1:]
|
|
||||||
case seqHeader < 128:
|
case seqHeader < 128:
|
||||||
nSeqs = int(seqHeader)
|
nSeqs = int(seqHeader)
|
||||||
in = in[1:]
|
in = in[1:]
|
||||||
|
@ -517,6 +523,13 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
|
||||||
nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
|
nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
|
||||||
in = in[3:]
|
in = in[3:]
|
||||||
}
|
}
|
||||||
|
if nSeqs == 0 && len(in) != 0 {
|
||||||
|
// When no sequences, there should not be any more data...
|
||||||
|
if debugDecoder {
|
||||||
|
printf("prepareSequences: 0 sequences, but %d byte(s) left on stream\n", len(in))
|
||||||
|
}
|
||||||
|
return ErrUnexpectedBlockSize
|
||||||
|
}
|
||||||
|
|
||||||
var seqs = &hist.decoders
|
var seqs = &hist.decoders
|
||||||
seqs.nSeqs = nSeqs
|
seqs.nSeqs = nSeqs
|
||||||
|
@ -635,6 +648,7 @@ func (b *blockDec) decodeSequences(hist *history) error {
|
||||||
hist.decoders.seqSize = len(hist.decoders.literals)
|
hist.decoders.seqSize = len(hist.decoders.literals)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
hist.decoders.windowSize = hist.windowSize
|
||||||
hist.decoders.prevOffset = hist.recentOffsets
|
hist.decoders.prevOffset = hist.recentOffsets
|
||||||
err := hist.decoders.decode(b.sequence)
|
err := hist.decoders.decode(b.sequence)
|
||||||
hist.recentOffsets = hist.decoders.prevOffset
|
hist.recentOffsets = hist.decoders.prevOffset
|
||||||
|
|
|
@ -348,10 +348,10 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
|
||||||
frame.history.setDict(&dict)
|
frame.history.setDict(&dict)
|
||||||
}
|
}
|
||||||
|
|
||||||
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
|
if frame.FrameContentSize != fcsUnknown && frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
|
||||||
return dst, ErrDecoderSizeExceeded
|
return dst, ErrDecoderSizeExceeded
|
||||||
}
|
}
|
||||||
if frame.FrameContentSize > 0 && frame.FrameContentSize < 1<<30 {
|
if frame.FrameContentSize < 1<<30 {
|
||||||
// Never preallocate more than 1 GB up front.
|
// Never preallocate more than 1 GB up front.
|
||||||
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
|
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
|
||||||
dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize))
|
dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize))
|
||||||
|
@ -514,7 +514,7 @@ func (d *Decoder) nextBlockSync() (ok bool) {
|
||||||
|
|
||||||
// Check frame size (before CRC)
|
// Check frame size (before CRC)
|
||||||
d.syncStream.decodedFrame += uint64(len(d.current.b))
|
d.syncStream.decodedFrame += uint64(len(d.current.b))
|
||||||
if d.frame.FrameContentSize > 0 && d.syncStream.decodedFrame > d.frame.FrameContentSize {
|
if d.syncStream.decodedFrame > d.frame.FrameContentSize {
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
printf("DecodedFrame (%d) > FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
|
printf("DecodedFrame (%d) > FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
|
||||||
}
|
}
|
||||||
|
@ -523,7 +523,7 @@ func (d *Decoder) nextBlockSync() (ok bool) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check FCS
|
// Check FCS
|
||||||
if d.current.d.Last && d.frame.FrameContentSize > 0 && d.syncStream.decodedFrame != d.frame.FrameContentSize {
|
if d.current.d.Last && d.frame.FrameContentSize != fcsUnknown && d.syncStream.decodedFrame != d.frame.FrameContentSize {
|
||||||
if debugDecoder {
|
if debugDecoder {
|
||||||
printf("DecodedFrame (%d) != FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
|
printf("DecodedFrame (%d) != FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
|
||||||
}
|
}
|
||||||
|
@ -700,6 +700,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||||
}
|
}
|
||||||
hist.decoders = block.async.newHist.decoders
|
hist.decoders = block.async.newHist.decoders
|
||||||
hist.recentOffsets = block.async.newHist.recentOffsets
|
hist.recentOffsets = block.async.newHist.recentOffsets
|
||||||
|
hist.windowSize = block.async.newHist.windowSize
|
||||||
if block.async.newHist.dict != nil {
|
if block.async.newHist.dict != nil {
|
||||||
hist.setDict(block.async.newHist.dict)
|
hist.setDict(block.async.newHist.dict)
|
||||||
}
|
}
|
||||||
|
@ -811,11 +812,11 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||||
}
|
}
|
||||||
if !hasErr {
|
if !hasErr {
|
||||||
decodedFrame += uint64(len(do.b))
|
decodedFrame += uint64(len(do.b))
|
||||||
if fcs > 0 && decodedFrame > fcs {
|
if decodedFrame > fcs {
|
||||||
println("fcs exceeded", block.Last, fcs, decodedFrame)
|
println("fcs exceeded", block.Last, fcs, decodedFrame)
|
||||||
do.err = ErrFrameSizeExceeded
|
do.err = ErrFrameSizeExceeded
|
||||||
hasErr = true
|
hasErr = true
|
||||||
} else if block.Last && fcs > 0 && decodedFrame != fcs {
|
} else if block.Last && fcs != fcsUnknown && decodedFrame != fcs {
|
||||||
do.err = ErrFrameSizeMismatch
|
do.err = ErrFrameSizeMismatch
|
||||||
hasErr = true
|
hasErr = true
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -197,7 +197,7 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||||
default:
|
default:
|
||||||
fcsSize = 1 << v
|
fcsSize = 1 << v
|
||||||
}
|
}
|
||||||
d.FrameContentSize = 0
|
d.FrameContentSize = fcsUnknown
|
||||||
if fcsSize > 0 {
|
if fcsSize > 0 {
|
||||||
b, err := br.readSmall(fcsSize)
|
b, err := br.readSmall(fcsSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -343,12 +343,7 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
||||||
err = ErrDecoderSizeExceeded
|
err = ErrDecoderSizeExceeded
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if d.SingleSegment && uint64(len(d.history.b)) > d.o.maxDecodedSize {
|
if uint64(len(d.history.b)-crcStart) > d.FrameContentSize {
|
||||||
println("runDecoder: single segment and", uint64(len(d.history.b)), ">", d.o.maxDecodedSize)
|
|
||||||
err = ErrFrameSizeExceeded
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if d.FrameContentSize > 0 && uint64(len(d.history.b)-crcStart) > d.FrameContentSize {
|
|
||||||
println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize)
|
println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize)
|
||||||
err = ErrFrameSizeExceeded
|
err = ErrFrameSizeExceeded
|
||||||
break
|
break
|
||||||
|
@ -356,13 +351,13 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
||||||
if dec.Last {
|
if dec.Last {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if debugDecoder && d.FrameContentSize > 0 {
|
if debugDecoder {
|
||||||
println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize)
|
println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
dst = d.history.b
|
dst = d.history.b
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if d.FrameContentSize > 0 && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
|
if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
|
||||||
err = ErrFrameSizeMismatch
|
err = ErrFrameSizeMismatch
|
||||||
} else if d.HasCheckSum {
|
} else if d.HasCheckSum {
|
||||||
var n int
|
var n int
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
//go:build gofuzz
|
//go:build ignorecrc
|
||||||
// +build gofuzz
|
// +build ignorecrc
|
||||||
|
|
||||||
// Copyright 2019+ Klaus Post. All rights reserved.
|
// Copyright 2019+ Klaus Post. All rights reserved.
|
||||||
// License information can be found in the LICENSE file.
|
// License information can be found in the LICENSE file.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
//go:build !gofuzz
|
//go:build !ignorecrc
|
||||||
// +build !gofuzz
|
// +build !ignorecrc
|
||||||
|
|
||||||
// Copyright 2019+ Klaus Post. All rights reserved.
|
// Copyright 2019+ Klaus Post. All rights reserved.
|
||||||
// License information can be found in the LICENSE file.
|
// License information can be found in the LICENSE file.
|
||||||
|
|
|
@ -107,7 +107,10 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
|
||||||
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
|
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
|
||||||
s.seqSize = 0
|
s.seqSize = 0
|
||||||
litRemain := len(s.literals)
|
litRemain := len(s.literals)
|
||||||
|
maxBlockSize := maxCompressedBlockSize
|
||||||
|
if s.windowSize < maxBlockSize {
|
||||||
|
maxBlockSize = s.windowSize
|
||||||
|
}
|
||||||
for i := range seqs {
|
for i := range seqs {
|
||||||
var ll, mo, ml int
|
var ll, mo, ml int
|
||||||
if br.off > 4+((maxOffsetBits+16+16)>>3) {
|
if br.off > 4+((maxOffsetBits+16+16)>>3) {
|
||||||
|
@ -192,7 +195,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
|
||||||
}
|
}
|
||||||
s.seqSize += ll + ml
|
s.seqSize += ll + ml
|
||||||
if s.seqSize > maxBlockSize {
|
if s.seqSize > maxBlockSize {
|
||||||
return fmt.Errorf("output (%d) bigger than max block size", s.seqSize)
|
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||||
}
|
}
|
||||||
litRemain -= ll
|
litRemain -= ll
|
||||||
if litRemain < 0 {
|
if litRemain < 0 {
|
||||||
|
@ -230,7 +233,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
|
||||||
}
|
}
|
||||||
s.seqSize += litRemain
|
s.seqSize += litRemain
|
||||||
if s.seqSize > maxBlockSize {
|
if s.seqSize > maxBlockSize {
|
||||||
return fmt.Errorf("output (%d) bigger than max block size", s.seqSize)
|
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||||
}
|
}
|
||||||
err := br.close()
|
err := br.close()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -347,6 +350,10 @@ func (s *sequenceDecs) decodeSync(history *history) error {
|
||||||
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
|
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
|
||||||
hist := history.b[history.ignoreBuffer:]
|
hist := history.b[history.ignoreBuffer:]
|
||||||
out := s.out
|
out := s.out
|
||||||
|
maxBlockSize := maxCompressedBlockSize
|
||||||
|
if s.windowSize < maxBlockSize {
|
||||||
|
maxBlockSize = s.windowSize
|
||||||
|
}
|
||||||
|
|
||||||
for i := seqs - 1; i >= 0; i-- {
|
for i := seqs - 1; i >= 0; i-- {
|
||||||
if br.overread() {
|
if br.overread() {
|
||||||
|
@ -426,7 +433,7 @@ func (s *sequenceDecs) decodeSync(history *history) error {
|
||||||
}
|
}
|
||||||
size := ll + ml + len(out)
|
size := ll + ml + len(out)
|
||||||
if size-startSize > maxBlockSize {
|
if size-startSize > maxBlockSize {
|
||||||
return fmt.Errorf("output (%d) bigger than max block size", size)
|
return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
|
||||||
}
|
}
|
||||||
if size > cap(out) {
|
if size > cap(out) {
|
||||||
// Not enough size, which can happen under high volume block streaming conditions
|
// Not enough size, which can happen under high volume block streaming conditions
|
||||||
|
@ -535,6 +542,11 @@ func (s *sequenceDecs) decodeSync(history *history) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if space for literals
|
||||||
|
if len(s.literals)+len(s.out)-startSize > maxBlockSize {
|
||||||
|
return fmt.Errorf("output (%d) bigger than max block size (%d)", len(s.out), maxBlockSize)
|
||||||
|
}
|
||||||
|
|
||||||
// Add final literals
|
// Add final literals
|
||||||
s.out = append(out, s.literals...)
|
s.out = append(out, s.literals...)
|
||||||
return br.close()
|
return br.close()
|
||||||
|
|
|
@ -20,7 +20,7 @@ const ZipMethodPKWare = 20
|
||||||
|
|
||||||
var zipReaderPool sync.Pool
|
var zipReaderPool sync.Pool
|
||||||
|
|
||||||
// newZipReader cannot be used since we would leak goroutines...
|
// newZipReader creates a pooled zip decompressor.
|
||||||
func newZipReader(r io.Reader) io.ReadCloser {
|
func newZipReader(r io.Reader) io.ReadCloser {
|
||||||
dec, ok := zipReaderPool.Get().(*Decoder)
|
dec, ok := zipReaderPool.Get().(*Decoder)
|
||||||
if ok {
|
if ok {
|
||||||
|
@ -44,10 +44,14 @@ func (r *pooledZipReader) Read(p []byte) (n int, err error) {
|
||||||
r.mu.Lock()
|
r.mu.Lock()
|
||||||
defer r.mu.Unlock()
|
defer r.mu.Unlock()
|
||||||
if r.dec == nil {
|
if r.dec == nil {
|
||||||
return 0, errors.New("Read after Close")
|
return 0, errors.New("read after close or EOF")
|
||||||
}
|
}
|
||||||
dec, err := r.dec.Read(p)
|
dec, err := r.dec.Read(p)
|
||||||
|
if err == io.EOF {
|
||||||
|
err = r.dec.Reset(nil)
|
||||||
|
zipReaderPool.Put(r.dec)
|
||||||
|
r.dec = nil
|
||||||
|
}
|
||||||
return dec, err
|
return dec, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -112,11 +116,5 @@ func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
|
||||||
// ZipDecompressor returns a decompressor that can be registered with zip libraries.
|
// ZipDecompressor returns a decompressor that can be registered with zip libraries.
|
||||||
// See ZipCompressor for example.
|
// See ZipCompressor for example.
|
||||||
func ZipDecompressor() func(r io.Reader) io.ReadCloser {
|
func ZipDecompressor() func(r io.Reader) io.ReadCloser {
|
||||||
return func(r io.Reader) io.ReadCloser {
|
return newZipReader
|
||||||
d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
return d.IOReadCloser()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,6 +39,9 @@ const zstdMinMatch = 3
|
||||||
// Reset the buffer offset when reaching this.
|
// Reset the buffer offset when reaching this.
|
||||||
const bufferReset = math.MaxInt32 - MaxWindowSize
|
const bufferReset = math.MaxInt32 - MaxWindowSize
|
||||||
|
|
||||||
|
// fcsUnknown is used for unknown frame content size.
|
||||||
|
const fcsUnknown = math.MaxUint64
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// ErrReservedBlockType is returned when a reserved block type is found.
|
// ErrReservedBlockType is returned when a reserved block type is found.
|
||||||
// Typically this indicates wrong or corrupted input.
|
// Typically this indicates wrong or corrupted input.
|
||||||
|
@ -52,6 +55,10 @@ var (
|
||||||
// Typically returned on invalid input.
|
// Typically returned on invalid input.
|
||||||
ErrBlockTooSmall = errors.New("block too small")
|
ErrBlockTooSmall = errors.New("block too small")
|
||||||
|
|
||||||
|
// ErrUnexpectedBlockSize is returned when a block has unexpected size.
|
||||||
|
// Typically returned on invalid input.
|
||||||
|
ErrUnexpectedBlockSize = errors.New("unexpected block size")
|
||||||
|
|
||||||
// ErrMagicMismatch is returned when a "magic" number isn't what is expected.
|
// ErrMagicMismatch is returned when a "magic" number isn't what is expected.
|
||||||
// Typically this indicates wrong or corrupted input.
|
// Typically this indicates wrong or corrupted input.
|
||||||
ErrMagicMismatch = errors.New("invalid input: magic number mismatch")
|
ErrMagicMismatch = errors.New("invalid input: magic number mismatch")
|
||||||
|
|
|
@ -133,7 +133,7 @@ github.com/imdario/mergo
|
||||||
# github.com/inconshreveable/mousetrap v1.0.0
|
# github.com/inconshreveable/mousetrap v1.0.0
|
||||||
## explicit
|
## explicit
|
||||||
github.com/inconshreveable/mousetrap
|
github.com/inconshreveable/mousetrap
|
||||||
# github.com/klauspost/compress v1.15.0
|
# github.com/klauspost/compress v1.15.1
|
||||||
## explicit; go 1.15
|
## explicit; go 1.15
|
||||||
github.com/klauspost/compress
|
github.com/klauspost/compress
|
||||||
github.com/klauspost/compress/fse
|
github.com/klauspost/compress/fse
|
||||||
|
|
Loading…
Reference in New Issue