mirror of https://github.com/docker/cli.git
vendor: github.com/klauspost/compress v1.15.1
full diff: https://github.com/klauspost/compress/compare/v1.15.0...v1.15.1 Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
parent
acf6aee911
commit
b3f3beb739
|
@ -53,7 +53,7 @@ require (
|
|||
github.com/golang/protobuf v1.5.2 // indirect
|
||||
github.com/gorilla/mux v1.8.0 // indirect; updated to v1.8.0 to get rid of old compatibility for "context"
|
||||
github.com/inconshreveable/mousetrap v1.0.0 // indirect
|
||||
github.com/klauspost/compress v1.15.0 // indirect
|
||||
github.com/klauspost/compress v1.15.1 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
|
||||
github.com/miekg/pkcs11 v1.1.1 // indirect
|
||||
github.com/moby/sys/symlink v0.2.0 // indirect
|
||||
|
|
|
@ -249,8 +249,8 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8
|
|||
github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00=
|
||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/klauspost/compress v1.15.0 h1:xqfchp4whNFxn5A4XFyyYtitiWI8Hy5EW59jEwcyL6U=
|
||||
github.com/klauspost/compress v1.15.0/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
|
||||
github.com/klauspost/compress v1.15.1 h1:y9FcTHGyrebwfP0ZZqFiaxTaiDnUrGkJkI+f583BL1A=
|
||||
github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
||||
|
|
|
@ -17,6 +17,23 @@ This package provides various compression algorithms.
|
|||
|
||||
# changelog
|
||||
|
||||
* Mar 3, 2022 (v1.15.0)
|
||||
* zstd: Refactor decoder by @klauspost in [#498](https://github.com/klauspost/compress/pull/498)
|
||||
* zstd: Add stream encoding without goroutines by @klauspost in [#505](https://github.com/klauspost/compress/pull/505)
|
||||
* huff0: Prevent single blocks exceeding 16 bits by @klauspost in[#507](https://github.com/klauspost/compress/pull/507)
|
||||
* flate: Inline literal emission by @klauspost in [#509](https://github.com/klauspost/compress/pull/509)
|
||||
* gzhttp: Add zstd to transport by @klauspost in [#400](https://github.com/klauspost/compress/pull/400)
|
||||
* gzhttp: Make content-type optional by @klauspost in [#510](https://github.com/klauspost/compress/pull/510)
|
||||
|
||||
<details>
|
||||
<summary>See Details</summary>
|
||||
Both compression and decompression now supports "synchronous" stream operations. This means that whenever "concurrency" is set to 1, they will operate without spawning goroutines.
|
||||
|
||||
Stream decompression is now faster on asynchronous, since the goroutine allocation much more effectively splits the workload. On typical streams this will typically use 2 cores fully for decompression. When a stream has finished decoding no goroutines will be left over, so decoders can now safely be pooled and still be garbage collected.
|
||||
|
||||
While the release has been extensively tested, it is recommended to testing when upgrading.
|
||||
</details>
|
||||
|
||||
* Feb 22, 2022 (v1.14.4)
|
||||
* flate: Fix rare huffman only (-2) corruption. [#503](https://github.com/klauspost/compress/pull/503)
|
||||
* zip: Update deprecated CreateHeaderRaw to correctly call CreateRaw by @saracen in [#502](https://github.com/klauspost/compress/pull/502)
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
package huff0
|
||||
|
||||
//go:generate go run generate.go
|
||||
//go:generate asmfmt -w decompress_amd64.s
|
||||
//go:generate asmfmt -w decompress_8b_amd64.s
|
|
@ -165,6 +165,11 @@ func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 {
|
|||
return uint16(b.value >> ((64 - n) & 63))
|
||||
}
|
||||
|
||||
// peekTopBits(n) is equvialent to peekBitFast(64 - n)
|
||||
func (b *bitReaderShifted) peekTopBits(n uint8) uint16 {
|
||||
return uint16(b.value >> n)
|
||||
}
|
||||
|
||||
func (b *bitReaderShifted) advance(n uint8) {
|
||||
b.bitsRead += n
|
||||
b.value <<= n & 63
|
||||
|
|
|
@ -725,189 +725,6 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
|
|||
return dst, br.close()
|
||||
}
|
||||
|
||||
// Decompress4X will decompress a 4X encoded stream.
|
||||
// The length of the supplied input must match the end of a block exactly.
|
||||
// The *capacity* of the dst slice must match the destination size of
|
||||
// the uncompressed data exactly.
|
||||
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||
if len(d.dt.single) == 0 {
|
||||
return nil, errors.New("no table loaded")
|
||||
}
|
||||
if len(src) < 6+(4*1) {
|
||||
return nil, errors.New("input too small")
|
||||
}
|
||||
if use8BitTables && d.actualTableLog <= 8 {
|
||||
return d.decompress4X8bit(dst, src)
|
||||
}
|
||||
|
||||
var br [4]bitReaderShifted
|
||||
// Decode "jump table"
|
||||
start := 6
|
||||
for i := 0; i < 3; i++ {
|
||||
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
|
||||
if start+length >= len(src) {
|
||||
return nil, errors.New("truncated input (or invalid offset)")
|
||||
}
|
||||
err := br[i].init(src[start : start+length])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
start += length
|
||||
}
|
||||
err := br[3].init(src[start:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// destination, offset to match first output
|
||||
dstSize := cap(dst)
|
||||
dst = dst[:dstSize]
|
||||
out := dst
|
||||
dstEvery := (dstSize + 3) / 4
|
||||
|
||||
const tlSize = 1 << tableLogMax
|
||||
const tlMask = tlSize - 1
|
||||
single := d.dt.single[:tlSize]
|
||||
|
||||
// Use temp table to avoid bound checks/append penalty.
|
||||
buf := d.buffer()
|
||||
var off uint8
|
||||
var decoded int
|
||||
|
||||
// Decode 2 values from each decoder/loop.
|
||||
const bufoff = 256
|
||||
for {
|
||||
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
||||
break
|
||||
}
|
||||
|
||||
{
|
||||
const stream = 0
|
||||
const stream2 = 1
|
||||
br[stream].fillFast()
|
||||
br[stream2].fillFast()
|
||||
|
||||
val := br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask]
|
||||
v2 := single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off] = uint8(v.entry >> 8)
|
||||
buf[stream2][off] = uint8(v2.entry >> 8)
|
||||
|
||||
val = br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v = single[val&tlMask]
|
||||
v2 = single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off+1] = uint8(v.entry >> 8)
|
||||
buf[stream2][off+1] = uint8(v2.entry >> 8)
|
||||
}
|
||||
|
||||
{
|
||||
const stream = 2
|
||||
const stream2 = 3
|
||||
br[stream].fillFast()
|
||||
br[stream2].fillFast()
|
||||
|
||||
val := br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask]
|
||||
v2 := single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off] = uint8(v.entry >> 8)
|
||||
buf[stream2][off] = uint8(v2.entry >> 8)
|
||||
|
||||
val = br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v = single[val&tlMask]
|
||||
v2 = single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off+1] = uint8(v.entry >> 8)
|
||||
buf[stream2][off+1] = uint8(v2.entry >> 8)
|
||||
}
|
||||
|
||||
off += 2
|
||||
|
||||
if off == 0 {
|
||||
if bufoff > dstEvery {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 1")
|
||||
}
|
||||
copy(out, buf[0][:])
|
||||
copy(out[dstEvery:], buf[1][:])
|
||||
copy(out[dstEvery*2:], buf[2][:])
|
||||
copy(out[dstEvery*3:], buf[3][:])
|
||||
out = out[bufoff:]
|
||||
decoded += bufoff * 4
|
||||
// There must at least be 3 buffers left.
|
||||
if len(out) < dstEvery*3 {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 2")
|
||||
}
|
||||
}
|
||||
}
|
||||
if off > 0 {
|
||||
ioff := int(off)
|
||||
if len(out) < dstEvery*3+ioff {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 3")
|
||||
}
|
||||
copy(out, buf[0][:off])
|
||||
copy(out[dstEvery:], buf[1][:off])
|
||||
copy(out[dstEvery*2:], buf[2][:off])
|
||||
copy(out[dstEvery*3:], buf[3][:off])
|
||||
decoded += int(off) * 4
|
||||
out = out[off:]
|
||||
}
|
||||
|
||||
// Decode remaining.
|
||||
remainBytes := dstEvery - (decoded / 4)
|
||||
for i := range br {
|
||||
offset := dstEvery * i
|
||||
endsAt := offset + remainBytes
|
||||
if endsAt > len(out) {
|
||||
endsAt = len(out)
|
||||
}
|
||||
br := &br[i]
|
||||
bitsLeft := br.remaining()
|
||||
for bitsLeft > 0 {
|
||||
br.fill()
|
||||
if offset >= endsAt {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 4")
|
||||
}
|
||||
|
||||
// Read value and increment offset.
|
||||
val := br.peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask].entry
|
||||
nBits := uint8(v)
|
||||
br.advance(nBits)
|
||||
bitsLeft -= uint(nBits)
|
||||
out[offset] = uint8(v >> 8)
|
||||
offset++
|
||||
}
|
||||
if offset != endsAt {
|
||||
d.bufs.Put(buf)
|
||||
return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
|
||||
}
|
||||
decoded += offset - dstEvery*i
|
||||
err = br.close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
d.bufs.Put(buf)
|
||||
if dstSize != decoded {
|
||||
return nil, errors.New("corruption detected: short output block")
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
// Decompress4X will decompress a 4X encoded stream.
|
||||
// The length of the supplied input must match the end of a block exactly.
|
||||
// The *capacity* of the dst slice must match the destination size of
|
||||
|
|
|
@ -0,0 +1,488 @@
|
|||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
#include "funcdata.h"
|
||||
#include "go_asm.h"
|
||||
|
||||
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
||||
|
||||
// func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
||||
TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8
|
||||
#define off R8
|
||||
#define buffer DI
|
||||
#define table SI
|
||||
|
||||
#define br_bits_read R9
|
||||
#define br_value R10
|
||||
#define br_offset R11
|
||||
#define peek_bits R12
|
||||
#define exhausted DX
|
||||
|
||||
#define br0 R13
|
||||
#define br1 R14
|
||||
#define br2 R15
|
||||
#define br3 BP
|
||||
|
||||
MOVQ BP, 0(SP)
|
||||
|
||||
XORQ exhausted, exhausted // exhausted = false
|
||||
XORQ off, off // off = 0
|
||||
|
||||
MOVBQZX peekBits+32(FP), peek_bits
|
||||
MOVQ buf+40(FP), buffer
|
||||
MOVQ tbl+48(FP), table
|
||||
|
||||
MOVQ pbr0+0(FP), br0
|
||||
MOVQ pbr1+8(FP), br1
|
||||
MOVQ pbr2+16(FP), br2
|
||||
MOVQ pbr3+24(FP), br3
|
||||
|
||||
main_loop:
|
||||
|
||||
// const stream = 0
|
||||
// br0.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br0), br_value
|
||||
MOVQ bitReaderShifted_off(br0), br_offset
|
||||
|
||||
// if b.bitsRead >= 32 {
|
||||
CMPQ br_bits_read, $32
|
||||
JB skip_fill0
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br0), AX
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br0.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill0:
|
||||
|
||||
// val0 := br0.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br0.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val1 := br0.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br0.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 0(buffer)(off*1)
|
||||
|
||||
// SECOND PART:
|
||||
// val2 := br0.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br0.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val3 := br0.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v3 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br0.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||
MOVW BX, 0+2(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br0)
|
||||
MOVQ br_value, bitReaderShifted_value(br0)
|
||||
MOVQ br_offset, bitReaderShifted_off(br0)
|
||||
|
||||
// const stream = 1
|
||||
// br1.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br1), br_value
|
||||
MOVQ bitReaderShifted_off(br1), br_offset
|
||||
|
||||
// if b.bitsRead >= 32 {
|
||||
CMPQ br_bits_read, $32
|
||||
JB skip_fill1
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br1), AX
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br1.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill1:
|
||||
|
||||
// val0 := br1.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br1.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val1 := br1.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br1.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 256(buffer)(off*1)
|
||||
|
||||
// SECOND PART:
|
||||
// val2 := br1.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br1.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val3 := br1.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v3 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br1.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||
MOVW BX, 256+2(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br1)
|
||||
MOVQ br_value, bitReaderShifted_value(br1)
|
||||
MOVQ br_offset, bitReaderShifted_off(br1)
|
||||
|
||||
// const stream = 2
|
||||
// br2.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br2), br_value
|
||||
MOVQ bitReaderShifted_off(br2), br_offset
|
||||
|
||||
// if b.bitsRead >= 32 {
|
||||
CMPQ br_bits_read, $32
|
||||
JB skip_fill2
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br2), AX
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br2.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill2:
|
||||
|
||||
// val0 := br2.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br2.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val1 := br2.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br2.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 512(buffer)(off*1)
|
||||
|
||||
// SECOND PART:
|
||||
// val2 := br2.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br2.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val3 := br2.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v3 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br2.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||
MOVW BX, 512+2(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br2)
|
||||
MOVQ br_value, bitReaderShifted_value(br2)
|
||||
MOVQ br_offset, bitReaderShifted_off(br2)
|
||||
|
||||
// const stream = 3
|
||||
// br3.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br3), br_value
|
||||
MOVQ bitReaderShifted_off(br3), br_offset
|
||||
|
||||
// if b.bitsRead >= 32 {
|
||||
CMPQ br_bits_read, $32
|
||||
JB skip_fill3
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br3), AX
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br3.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill3:
|
||||
|
||||
// val0 := br3.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br3.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val1 := br3.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br3.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 768(buffer)(off*1)
|
||||
|
||||
// SECOND PART:
|
||||
// val2 := br3.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br3.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val3 := br3.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v3 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br3.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||
MOVW BX, 768+2(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br3)
|
||||
MOVQ br_value, bitReaderShifted_value(br3)
|
||||
MOVQ br_offset, bitReaderShifted_off(br3)
|
||||
|
||||
ADDQ $4, off // off += 2
|
||||
|
||||
TESTB DH, DH // any br[i].ofs < 4?
|
||||
JNZ end
|
||||
|
||||
CMPQ off, $bufoff
|
||||
JL main_loop
|
||||
|
||||
end:
|
||||
MOVQ 0(SP), BP
|
||||
|
||||
MOVB off, ret+56(FP)
|
||||
RET
|
||||
|
||||
#undef off
|
||||
#undef buffer
|
||||
#undef table
|
||||
|
||||
#undef br_bits_read
|
||||
#undef br_value
|
||||
#undef br_offset
|
||||
#undef peek_bits
|
||||
#undef exhausted
|
||||
|
||||
#undef br0
|
||||
#undef br1
|
||||
#undef br2
|
||||
#undef br3
|
197
vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in
generated
vendored
Normal file
197
vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in
generated
vendored
Normal file
|
@ -0,0 +1,197 @@
|
|||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
#include "funcdata.h"
|
||||
#include "go_asm.h"
|
||||
|
||||
|
||||
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
||||
|
||||
//func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
||||
TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8
|
||||
#define off R8
|
||||
#define buffer DI
|
||||
#define table SI
|
||||
|
||||
#define br_bits_read R9
|
||||
#define br_value R10
|
||||
#define br_offset R11
|
||||
#define peek_bits R12
|
||||
#define exhausted DX
|
||||
|
||||
#define br0 R13
|
||||
#define br1 R14
|
||||
#define br2 R15
|
||||
#define br3 BP
|
||||
|
||||
MOVQ BP, 0(SP)
|
||||
|
||||
XORQ exhausted, exhausted // exhausted = false
|
||||
XORQ off, off // off = 0
|
||||
|
||||
MOVBQZX peekBits+32(FP), peek_bits
|
||||
MOVQ buf+40(FP), buffer
|
||||
MOVQ tbl+48(FP), table
|
||||
|
||||
MOVQ pbr0+0(FP), br0
|
||||
MOVQ pbr1+8(FP), br1
|
||||
MOVQ pbr2+16(FP), br2
|
||||
MOVQ pbr3+24(FP), br3
|
||||
|
||||
main_loop:
|
||||
{{ define "decode_2_values_x86" }}
|
||||
// const stream = {{ var "id" }}
|
||||
// br{{ var "id"}}.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br{{ var "id" }}), br_value
|
||||
MOVQ bitReaderShifted_off(br{{ var "id" }}), br_offset
|
||||
|
||||
// if b.bitsRead >= 32 {
|
||||
CMPQ br_bits_read, $32
|
||||
JB skip_fill{{ var "id" }}
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br{{ var "id" }}), AX
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br{{ var "id"}}.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
// }
|
||||
skip_fill{{ var "id" }}:
|
||||
|
||||
// val0 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br{{ var "id"}}.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val1 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br{{ var "id"}}.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, {{ var "bufofs" }}(buffer)(off*1)
|
||||
|
||||
// SECOND PART:
|
||||
// val2 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br{{ var "id"}}.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val3 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v3 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br{{ var "id"}}.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||
MOVW BX, {{ var "bufofs" }}+2(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }})
|
||||
MOVQ br_value, bitReaderShifted_value(br{{ var "id" }})
|
||||
MOVQ br_offset, bitReaderShifted_off(br{{ var "id" }})
|
||||
{{ end }}
|
||||
|
||||
{{ set "id" "0" }}
|
||||
{{ set "ofs" "0" }}
|
||||
{{ set "bufofs" "0" }} {{/* id * bufoff */}}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
{{ set "id" "1" }}
|
||||
{{ set "ofs" "8" }}
|
||||
{{ set "bufofs" "256" }}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
{{ set "id" "2" }}
|
||||
{{ set "ofs" "16" }}
|
||||
{{ set "bufofs" "512" }}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
{{ set "id" "3" }}
|
||||
{{ set "ofs" "24" }}
|
||||
{{ set "bufofs" "768" }}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
ADDQ $4, off // off += 2
|
||||
|
||||
TESTB DH, DH // any br[i].ofs < 4?
|
||||
JNZ end
|
||||
|
||||
CMPQ off, $bufoff
|
||||
JL main_loop
|
||||
end:
|
||||
MOVQ 0(SP), BP
|
||||
|
||||
MOVB off, ret+56(FP)
|
||||
RET
|
||||
#undef off
|
||||
#undef buffer
|
||||
#undef table
|
||||
|
||||
#undef br_bits_read
|
||||
#undef br_value
|
||||
#undef br_offset
|
||||
#undef peek_bits
|
||||
#undef exhausted
|
||||
|
||||
#undef br0
|
||||
#undef br1
|
||||
#undef br2
|
||||
#undef br3
|
|
@ -0,0 +1,181 @@
|
|||
//go:build amd64 && !appengine && !noasm && gc
|
||||
// +build amd64,!appengine,!noasm,gc
|
||||
|
||||
// This file contains the specialisation of Decoder.Decompress4X
|
||||
// that uses an asm implementation of its main loop.
|
||||
package huff0
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// decompress4x_main_loop_x86 is an x86 assembler implementation
|
||||
// of Decompress4X when tablelog > 8.
|
||||
// go:noescape
|
||||
func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||
peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
|
||||
|
||||
// decompress4x_8b_loop_x86 is an x86 assembler implementation
|
||||
// of Decompress4X when tablelog <= 8 which decodes 4 entries
|
||||
// per loop.
|
||||
// go:noescape
|
||||
func decompress4x_8b_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||
peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
|
||||
|
||||
// fallback8BitSize is the size where using Go version is faster.
|
||||
const fallback8BitSize = 800
|
||||
|
||||
// Decompress4X will decompress a 4X encoded stream.
|
||||
// The length of the supplied input must match the end of a block exactly.
|
||||
// The *capacity* of the dst slice must match the destination size of
|
||||
// the uncompressed data exactly.
|
||||
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||
if len(d.dt.single) == 0 {
|
||||
return nil, errors.New("no table loaded")
|
||||
}
|
||||
if len(src) < 6+(4*1) {
|
||||
return nil, errors.New("input too small")
|
||||
}
|
||||
|
||||
use8BitTables := d.actualTableLog <= 8
|
||||
if cap(dst) < fallback8BitSize && use8BitTables {
|
||||
return d.decompress4X8bit(dst, src)
|
||||
}
|
||||
var br [4]bitReaderShifted
|
||||
// Decode "jump table"
|
||||
start := 6
|
||||
for i := 0; i < 3; i++ {
|
||||
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
|
||||
if start+length >= len(src) {
|
||||
return nil, errors.New("truncated input (or invalid offset)")
|
||||
}
|
||||
err := br[i].init(src[start : start+length])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
start += length
|
||||
}
|
||||
err := br[3].init(src[start:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// destination, offset to match first output
|
||||
dstSize := cap(dst)
|
||||
dst = dst[:dstSize]
|
||||
out := dst
|
||||
dstEvery := (dstSize + 3) / 4
|
||||
|
||||
const tlSize = 1 << tableLogMax
|
||||
const tlMask = tlSize - 1
|
||||
single := d.dt.single[:tlSize]
|
||||
|
||||
// Use temp table to avoid bound checks/append penalty.
|
||||
buf := d.buffer()
|
||||
var off uint8
|
||||
var decoded int
|
||||
|
||||
const debug = false
|
||||
|
||||
// see: bitReaderShifted.peekBitsFast()
|
||||
peekBits := uint8((64 - d.actualTableLog) & 63)
|
||||
|
||||
// Decode 2 values from each decoder/loop.
|
||||
const bufoff = 256
|
||||
for {
|
||||
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
||||
break
|
||||
}
|
||||
|
||||
if use8BitTables {
|
||||
off = decompress4x_8b_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
|
||||
} else {
|
||||
off = decompress4x_main_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
|
||||
}
|
||||
if debug {
|
||||
fmt.Print("DEBUG: ")
|
||||
fmt.Printf("off=%d,", off)
|
||||
for i := 0; i < 4; i++ {
|
||||
fmt.Printf(" br[%d]={bitsRead=%d, value=%x, off=%d}",
|
||||
i, br[i].bitsRead, br[i].value, br[i].off)
|
||||
}
|
||||
fmt.Println("")
|
||||
}
|
||||
|
||||
if off != 0 {
|
||||
break
|
||||
}
|
||||
|
||||
if bufoff > dstEvery {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 1")
|
||||
}
|
||||
copy(out, buf[0][:])
|
||||
copy(out[dstEvery:], buf[1][:])
|
||||
copy(out[dstEvery*2:], buf[2][:])
|
||||
copy(out[dstEvery*3:], buf[3][:])
|
||||
out = out[bufoff:]
|
||||
decoded += bufoff * 4
|
||||
// There must at least be 3 buffers left.
|
||||
if len(out) < dstEvery*3 {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 2")
|
||||
}
|
||||
}
|
||||
if off > 0 {
|
||||
ioff := int(off)
|
||||
if len(out) < dstEvery*3+ioff {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 3")
|
||||
}
|
||||
copy(out, buf[0][:off])
|
||||
copy(out[dstEvery:], buf[1][:off])
|
||||
copy(out[dstEvery*2:], buf[2][:off])
|
||||
copy(out[dstEvery*3:], buf[3][:off])
|
||||
decoded += int(off) * 4
|
||||
out = out[off:]
|
||||
}
|
||||
|
||||
// Decode remaining.
|
||||
remainBytes := dstEvery - (decoded / 4)
|
||||
for i := range br {
|
||||
offset := dstEvery * i
|
||||
endsAt := offset + remainBytes
|
||||
if endsAt > len(out) {
|
||||
endsAt = len(out)
|
||||
}
|
||||
br := &br[i]
|
||||
bitsLeft := br.remaining()
|
||||
for bitsLeft > 0 {
|
||||
br.fill()
|
||||
if offset >= endsAt {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 4")
|
||||
}
|
||||
|
||||
// Read value and increment offset.
|
||||
val := br.peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask].entry
|
||||
nBits := uint8(v)
|
||||
br.advance(nBits)
|
||||
bitsLeft -= uint(nBits)
|
||||
out[offset] = uint8(v >> 8)
|
||||
offset++
|
||||
}
|
||||
if offset != endsAt {
|
||||
d.bufs.Put(buf)
|
||||
return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
|
||||
}
|
||||
decoded += offset - dstEvery*i
|
||||
err = br.close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
d.bufs.Put(buf)
|
||||
if dstSize != decoded {
|
||||
return nil, errors.New("corruption detected: short output block")
|
||||
}
|
||||
return dst, nil
|
||||
}
|
|
@ -0,0 +1,506 @@
|
|||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
#include "funcdata.h"
|
||||
#include "go_asm.h"
|
||||
|
||||
#ifdef GOAMD64_v4
|
||||
#ifndef GOAMD64_v3
|
||||
#define GOAMD64_v3
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
||||
|
||||
// func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
||||
TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8
|
||||
#define off R8
|
||||
#define buffer DI
|
||||
#define table SI
|
||||
|
||||
#define br_bits_read R9
|
||||
#define br_value R10
|
||||
#define br_offset R11
|
||||
#define peek_bits R12
|
||||
#define exhausted DX
|
||||
|
||||
#define br0 R13
|
||||
#define br1 R14
|
||||
#define br2 R15
|
||||
#define br3 BP
|
||||
|
||||
MOVQ BP, 0(SP)
|
||||
|
||||
XORQ exhausted, exhausted // exhausted = false
|
||||
XORQ off, off // off = 0
|
||||
|
||||
MOVBQZX peekBits+32(FP), peek_bits
|
||||
MOVQ buf+40(FP), buffer
|
||||
MOVQ tbl+48(FP), table
|
||||
|
||||
MOVQ pbr0+0(FP), br0
|
||||
MOVQ pbr1+8(FP), br1
|
||||
MOVQ pbr2+16(FP), br2
|
||||
MOVQ pbr3+24(FP), br3
|
||||
|
||||
main_loop:
|
||||
|
||||
// const stream = 0
|
||||
// br0.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br0), br_value
|
||||
MOVQ bitReaderShifted_off(br0), br_offset
|
||||
|
||||
// We must have at least 2 * max tablelog left
|
||||
CMPQ br_bits_read, $64-22
|
||||
JBE skip_fill0
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br0), AX
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
#ifdef GOAMD64_v3
|
||||
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||
|
||||
#else
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
|
||||
#endif
|
||||
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br0.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill0:
|
||||
|
||||
// val0 := br0.peekTopBits(peekBits)
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br0.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
// val1 := br0.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br0.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 0(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br0)
|
||||
MOVQ br_value, bitReaderShifted_value(br0)
|
||||
MOVQ br_offset, bitReaderShifted_off(br0)
|
||||
|
||||
// const stream = 1
|
||||
// br1.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br1), br_value
|
||||
MOVQ bitReaderShifted_off(br1), br_offset
|
||||
|
||||
// We must have at least 2 * max tablelog left
|
||||
CMPQ br_bits_read, $64-22
|
||||
JBE skip_fill1
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br1), AX
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
#ifdef GOAMD64_v3
|
||||
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||
|
||||
#else
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
|
||||
#endif
|
||||
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br1.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill1:
|
||||
|
||||
// val0 := br1.peekTopBits(peekBits)
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br1.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
// val1 := br1.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br1.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 256(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br1)
|
||||
MOVQ br_value, bitReaderShifted_value(br1)
|
||||
MOVQ br_offset, bitReaderShifted_off(br1)
|
||||
|
||||
// const stream = 2
|
||||
// br2.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br2), br_value
|
||||
MOVQ bitReaderShifted_off(br2), br_offset
|
||||
|
||||
// We must have at least 2 * max tablelog left
|
||||
CMPQ br_bits_read, $64-22
|
||||
JBE skip_fill2
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br2), AX
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
#ifdef GOAMD64_v3
|
||||
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||
|
||||
#else
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
|
||||
#endif
|
||||
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br2.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill2:
|
||||
|
||||
// val0 := br2.peekTopBits(peekBits)
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br2.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
// val1 := br2.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br2.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 512(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br2)
|
||||
MOVQ br_value, bitReaderShifted_value(br2)
|
||||
MOVQ br_offset, bitReaderShifted_off(br2)
|
||||
|
||||
// const stream = 3
|
||||
// br3.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br3), br_value
|
||||
MOVQ bitReaderShifted_off(br3), br_offset
|
||||
|
||||
// We must have at least 2 * max tablelog left
|
||||
CMPQ br_bits_read, $64-22
|
||||
JBE skip_fill3
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br3), AX
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
#ifdef GOAMD64_v3
|
||||
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||
|
||||
#else
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
|
||||
#endif
|
||||
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br3.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill3:
|
||||
|
||||
// val0 := br3.peekTopBits(peekBits)
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br3.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
// val1 := br3.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br3.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 768(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br3)
|
||||
MOVQ br_value, bitReaderShifted_value(br3)
|
||||
MOVQ br_offset, bitReaderShifted_off(br3)
|
||||
|
||||
ADDQ $2, off // off += 2
|
||||
|
||||
TESTB DH, DH // any br[i].ofs < 4?
|
||||
JNZ end
|
||||
|
||||
CMPQ off, $bufoff
|
||||
JL main_loop
|
||||
|
||||
end:
|
||||
MOVQ 0(SP), BP
|
||||
|
||||
MOVB off, ret+56(FP)
|
||||
RET
|
||||
|
||||
#undef off
|
||||
#undef buffer
|
||||
#undef table
|
||||
|
||||
#undef br_bits_read
|
||||
#undef br_value
|
||||
#undef br_offset
|
||||
#undef peek_bits
|
||||
#undef exhausted
|
||||
|
||||
#undef br0
|
||||
#undef br1
|
||||
#undef br2
|
||||
#undef br3
|
|
@ -0,0 +1,195 @@
|
|||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
#include "funcdata.h"
|
||||
#include "go_asm.h"
|
||||
|
||||
#ifdef GOAMD64_v4
|
||||
#ifndef GOAMD64_v3
|
||||
#define GOAMD64_v3
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
||||
|
||||
//func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
||||
TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8
|
||||
#define off R8
|
||||
#define buffer DI
|
||||
#define table SI
|
||||
|
||||
#define br_bits_read R9
|
||||
#define br_value R10
|
||||
#define br_offset R11
|
||||
#define peek_bits R12
|
||||
#define exhausted DX
|
||||
|
||||
#define br0 R13
|
||||
#define br1 R14
|
||||
#define br2 R15
|
||||
#define br3 BP
|
||||
|
||||
MOVQ BP, 0(SP)
|
||||
|
||||
XORQ exhausted, exhausted // exhausted = false
|
||||
XORQ off, off // off = 0
|
||||
|
||||
MOVBQZX peekBits+32(FP), peek_bits
|
||||
MOVQ buf+40(FP), buffer
|
||||
MOVQ tbl+48(FP), table
|
||||
|
||||
MOVQ pbr0+0(FP), br0
|
||||
MOVQ pbr1+8(FP), br1
|
||||
MOVQ pbr2+16(FP), br2
|
||||
MOVQ pbr3+24(FP), br3
|
||||
|
||||
main_loop:
|
||||
{{ define "decode_2_values_x86" }}
|
||||
// const stream = {{ var "id" }}
|
||||
// br{{ var "id"}}.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br{{ var "id" }}), br_value
|
||||
MOVQ bitReaderShifted_off(br{{ var "id" }}), br_offset
|
||||
|
||||
// We must have at least 2 * max tablelog left
|
||||
CMPQ br_bits_read, $64-22
|
||||
JBE skip_fill{{ var "id" }}
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br{{ var "id" }}), AX
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
#ifdef GOAMD64_v3
|
||||
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||
#else
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
#endif
|
||||
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br{{ var "id"}}.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
// }
|
||||
skip_fill{{ var "id" }}:
|
||||
|
||||
// val0 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
#else
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
#endif
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br{{ var "id"}}.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
#else
|
||||
// val1 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
#endif
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br{{ var "id"}}.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, {{ var "bufofs" }}(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }})
|
||||
MOVQ br_value, bitReaderShifted_value(br{{ var "id" }})
|
||||
MOVQ br_offset, bitReaderShifted_off(br{{ var "id" }})
|
||||
{{ end }}
|
||||
|
||||
{{ set "id" "0" }}
|
||||
{{ set "ofs" "0" }}
|
||||
{{ set "bufofs" "0" }} {{/* id * bufoff */}}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
{{ set "id" "1" }}
|
||||
{{ set "ofs" "8" }}
|
||||
{{ set "bufofs" "256" }}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
{{ set "id" "2" }}
|
||||
{{ set "ofs" "16" }}
|
||||
{{ set "bufofs" "512" }}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
{{ set "id" "3" }}
|
||||
{{ set "ofs" "24" }}
|
||||
{{ set "bufofs" "768" }}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
ADDQ $2, off // off += 2
|
||||
|
||||
TESTB DH, DH // any br[i].ofs < 4?
|
||||
JNZ end
|
||||
|
||||
CMPQ off, $bufoff
|
||||
JL main_loop
|
||||
end:
|
||||
MOVQ 0(SP), BP
|
||||
|
||||
MOVB off, ret+56(FP)
|
||||
RET
|
||||
#undef off
|
||||
#undef buffer
|
||||
#undef table
|
||||
|
||||
#undef br_bits_read
|
||||
#undef br_value
|
||||
#undef br_offset
|
||||
#undef peek_bits
|
||||
#undef exhausted
|
||||
|
||||
#undef br0
|
||||
#undef br1
|
||||
#undef br2
|
||||
#undef br3
|
|
@ -0,0 +1,193 @@
|
|||
//go:build !amd64 || appengine || !gc || noasm
|
||||
// +build !amd64 appengine !gc noasm
|
||||
|
||||
// This file contains a generic implementation of Decoder.Decompress4X.
|
||||
package huff0
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Decompress4X will decompress a 4X encoded stream.
|
||||
// The length of the supplied input must match the end of a block exactly.
|
||||
// The *capacity* of the dst slice must match the destination size of
|
||||
// the uncompressed data exactly.
|
||||
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||
if len(d.dt.single) == 0 {
|
||||
return nil, errors.New("no table loaded")
|
||||
}
|
||||
if len(src) < 6+(4*1) {
|
||||
return nil, errors.New("input too small")
|
||||
}
|
||||
if use8BitTables && d.actualTableLog <= 8 {
|
||||
return d.decompress4X8bit(dst, src)
|
||||
}
|
||||
|
||||
var br [4]bitReaderShifted
|
||||
// Decode "jump table"
|
||||
start := 6
|
||||
for i := 0; i < 3; i++ {
|
||||
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
|
||||
if start+length >= len(src) {
|
||||
return nil, errors.New("truncated input (or invalid offset)")
|
||||
}
|
||||
err := br[i].init(src[start : start+length])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
start += length
|
||||
}
|
||||
err := br[3].init(src[start:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// destination, offset to match first output
|
||||
dstSize := cap(dst)
|
||||
dst = dst[:dstSize]
|
||||
out := dst
|
||||
dstEvery := (dstSize + 3) / 4
|
||||
|
||||
const tlSize = 1 << tableLogMax
|
||||
const tlMask = tlSize - 1
|
||||
single := d.dt.single[:tlSize]
|
||||
|
||||
// Use temp table to avoid bound checks/append penalty.
|
||||
buf := d.buffer()
|
||||
var off uint8
|
||||
var decoded int
|
||||
|
||||
// Decode 2 values from each decoder/loop.
|
||||
const bufoff = 256
|
||||
for {
|
||||
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
||||
break
|
||||
}
|
||||
|
||||
{
|
||||
const stream = 0
|
||||
const stream2 = 1
|
||||
br[stream].fillFast()
|
||||
br[stream2].fillFast()
|
||||
|
||||
val := br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask]
|
||||
v2 := single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off] = uint8(v.entry >> 8)
|
||||
buf[stream2][off] = uint8(v2.entry >> 8)
|
||||
|
||||
val = br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v = single[val&tlMask]
|
||||
v2 = single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off+1] = uint8(v.entry >> 8)
|
||||
buf[stream2][off+1] = uint8(v2.entry >> 8)
|
||||
}
|
||||
|
||||
{
|
||||
const stream = 2
|
||||
const stream2 = 3
|
||||
br[stream].fillFast()
|
||||
br[stream2].fillFast()
|
||||
|
||||
val := br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask]
|
||||
v2 := single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off] = uint8(v.entry >> 8)
|
||||
buf[stream2][off] = uint8(v2.entry >> 8)
|
||||
|
||||
val = br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v = single[val&tlMask]
|
||||
v2 = single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off+1] = uint8(v.entry >> 8)
|
||||
buf[stream2][off+1] = uint8(v2.entry >> 8)
|
||||
}
|
||||
|
||||
off += 2
|
||||
|
||||
if off == 0 {
|
||||
if bufoff > dstEvery {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 1")
|
||||
}
|
||||
copy(out, buf[0][:])
|
||||
copy(out[dstEvery:], buf[1][:])
|
||||
copy(out[dstEvery*2:], buf[2][:])
|
||||
copy(out[dstEvery*3:], buf[3][:])
|
||||
out = out[bufoff:]
|
||||
decoded += bufoff * 4
|
||||
// There must at least be 3 buffers left.
|
||||
if len(out) < dstEvery*3 {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 2")
|
||||
}
|
||||
}
|
||||
}
|
||||
if off > 0 {
|
||||
ioff := int(off)
|
||||
if len(out) < dstEvery*3+ioff {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 3")
|
||||
}
|
||||
copy(out, buf[0][:off])
|
||||
copy(out[dstEvery:], buf[1][:off])
|
||||
copy(out[dstEvery*2:], buf[2][:off])
|
||||
copy(out[dstEvery*3:], buf[3][:off])
|
||||
decoded += int(off) * 4
|
||||
out = out[off:]
|
||||
}
|
||||
|
||||
// Decode remaining.
|
||||
remainBytes := dstEvery - (decoded / 4)
|
||||
for i := range br {
|
||||
offset := dstEvery * i
|
||||
endsAt := offset + remainBytes
|
||||
if endsAt > len(out) {
|
||||
endsAt = len(out)
|
||||
}
|
||||
br := &br[i]
|
||||
bitsLeft := br.remaining()
|
||||
for bitsLeft > 0 {
|
||||
br.fill()
|
||||
if offset >= endsAt {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 4")
|
||||
}
|
||||
|
||||
// Read value and increment offset.
|
||||
val := br.peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask].entry
|
||||
nBits := uint8(v)
|
||||
br.advance(nBits)
|
||||
bitsLeft -= uint(nBits)
|
||||
out[offset] = uint8(v >> 8)
|
||||
offset++
|
||||
}
|
||||
if offset != endsAt {
|
||||
d.bufs.Put(buf)
|
||||
return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
|
||||
}
|
||||
decoded += offset - dstEvery*i
|
||||
err = br.close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
d.bufs.Put(buf)
|
||||
if dstSize != decoded {
|
||||
return nil, errors.New("corruption detected: short output block")
|
||||
}
|
||||
return dst, nil
|
||||
}
|
|
@ -153,10 +153,10 @@ http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip
|
|||
|
||||
This package:
|
||||
file out level insize outsize millis mb/s
|
||||
silesia.tar zskp 1 211947520 73101992 643 313.87
|
||||
silesia.tar zskp 2 211947520 67504318 969 208.38
|
||||
silesia.tar zskp 3 211947520 64595893 2007 100.68
|
||||
silesia.tar zskp 4 211947520 60995370 8825 22.90
|
||||
silesia.tar zskp 1 211947520 73821326 634 318.47
|
||||
silesia.tar zskp 2 211947520 67655404 1508 133.96
|
||||
silesia.tar zskp 3 211947520 64746933 3000 67.37
|
||||
silesia.tar zskp 4 211947520 60073508 16926 11.94
|
||||
|
||||
cgo zstd:
|
||||
silesia.tar zstd 1 211947520 73605392 543 371.56
|
||||
|
@ -165,94 +165,94 @@ silesia.tar zstd 6 211947520 62916450 1913 105.66
|
|||
silesia.tar zstd 9 211947520 60212393 5063 39.92
|
||||
|
||||
gzip, stdlib/this package:
|
||||
silesia.tar gzstd 1 211947520 80007735 1654 122.21
|
||||
silesia.tar gzkp 1 211947520 80136201 1152 175.45
|
||||
silesia.tar gzstd 1 211947520 80007735 1498 134.87
|
||||
silesia.tar gzkp 1 211947520 80088272 1009 200.31
|
||||
|
||||
GOB stream of binary data. Highly compressible.
|
||||
https://files.klauspost.com/compress/gob-stream.7z
|
||||
|
||||
file out level insize outsize millis mb/s
|
||||
gob-stream zskp 1 1911399616 235022249 3088 590.30
|
||||
gob-stream zskp 2 1911399616 205669791 3786 481.34
|
||||
gob-stream zskp 3 1911399616 175034659 9636 189.17
|
||||
gob-stream zskp 4 1911399616 165609838 50369 36.19
|
||||
gob-stream zskp 1 1911399616 233948096 3230 564.34
|
||||
gob-stream zskp 2 1911399616 203997694 4997 364.73
|
||||
gob-stream zskp 3 1911399616 173526523 13435 135.68
|
||||
gob-stream zskp 4 1911399616 162195235 47559 38.33
|
||||
|
||||
gob-stream zstd 1 1911399616 249810424 2637 691.26
|
||||
gob-stream zstd 3 1911399616 208192146 3490 522.31
|
||||
gob-stream zstd 6 1911399616 193632038 6687 272.56
|
||||
gob-stream zstd 9 1911399616 177620386 16175 112.70
|
||||
|
||||
gob-stream gzstd 1 1911399616 357382641 10251 177.82
|
||||
gob-stream gzkp 1 1911399616 359753026 5438 335.20
|
||||
gob-stream gzstd 1 1911399616 357382013 9046 201.49
|
||||
gob-stream gzkp 1 1911399616 359136669 4885 373.08
|
||||
|
||||
The test data for the Large Text Compression Benchmark is the first
|
||||
10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
|
||||
http://mattmahoney.net/dc/textdata.html
|
||||
|
||||
file out level insize outsize millis mb/s
|
||||
enwik9 zskp 1 1000000000 343848582 3609 264.18
|
||||
enwik9 zskp 2 1000000000 317276632 5746 165.97
|
||||
enwik9 zskp 3 1000000000 292243069 12162 78.41
|
||||
enwik9 zskp 4 1000000000 262183768 82837 11.51
|
||||
enwik9 zskp 1 1000000000 343833605 3687 258.64
|
||||
enwik9 zskp 2 1000000000 317001237 7672 124.29
|
||||
enwik9 zskp 3 1000000000 291915823 15923 59.89
|
||||
enwik9 zskp 4 1000000000 261710291 77697 12.27
|
||||
|
||||
enwik9 zstd 1 1000000000 358072021 3110 306.65
|
||||
enwik9 zstd 3 1000000000 313734672 4784 199.35
|
||||
enwik9 zstd 6 1000000000 295138875 10290 92.68
|
||||
enwik9 zstd 9 1000000000 278348700 28549 33.40
|
||||
|
||||
enwik9 gzstd 1 1000000000 382578136 9604 99.30
|
||||
enwik9 gzkp 1 1000000000 383825945 6544 145.73
|
||||
enwik9 gzstd 1 1000000000 382578136 8608 110.78
|
||||
enwik9 gzkp 1 1000000000 382781160 5628 169.45
|
||||
|
||||
Highly compressible JSON file.
|
||||
https://files.klauspost.com/compress/github-june-2days-2019.json.zst
|
||||
|
||||
file out level insize outsize millis mb/s
|
||||
github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40
|
||||
github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96
|
||||
github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75
|
||||
github-june-2days-2019.json zskp 4 6273951764 470320075 170190 35.16
|
||||
github-june-2days-2019.json zskp 1 6273951764 697439532 9789 611.17
|
||||
github-june-2days-2019.json zskp 2 6273951764 610876538 18553 322.49
|
||||
github-june-2days-2019.json zskp 3 6273951764 517662858 44186 135.41
|
||||
github-june-2days-2019.json zskp 4 6273951764 464617114 165373 36.18
|
||||
|
||||
github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00
|
||||
github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57
|
||||
github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18
|
||||
github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16
|
||||
|
||||
github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79
|
||||
github-june-2days-2019.json gzkp 1 6273951764 1125417694 21788 274.61
|
||||
github-june-2days-2019.json gzstd 1 6273951764 1164397768 26793 223.32
|
||||
github-june-2days-2019.json gzkp 1 6273951764 1120631856 17693 338.16
|
||||
|
||||
VM Image, Linux mint with a few installed applications:
|
||||
https://files.klauspost.com/compress/rawstudio-mint14.7z
|
||||
|
||||
file out level insize outsize millis mb/s
|
||||
rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84
|
||||
rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07
|
||||
rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08
|
||||
rawstudio-mint14.tar zskp 4 8558382592 2965110639 857750 9.52
|
||||
rawstudio-mint14.tar zskp 1 8558382592 3718400221 18206 448.29
|
||||
rawstudio-mint14.tar zskp 2 8558382592 3326118337 37074 220.15
|
||||
rawstudio-mint14.tar zskp 3 8558382592 3163842361 87306 93.49
|
||||
rawstudio-mint14.tar zskp 4 8558382592 2970480650 783862 10.41
|
||||
|
||||
rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27
|
||||
rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92
|
||||
rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77
|
||||
rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91
|
||||
|
||||
rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40
|
||||
rawstudio-mint14.tar gzkp 1 8558382592 3962605659 45113 180.92
|
||||
rawstudio-mint14.tar gzstd 1 8558382592 3926234992 51345 158.96
|
||||
rawstudio-mint14.tar gzkp 1 8558382592 3960117298 36722 222.26
|
||||
|
||||
CSV data:
|
||||
https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
|
||||
|
||||
file out level insize outsize millis mb/s
|
||||
nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35
|
||||
nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44
|
||||
nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66
|
||||
nyc-taxi-data-10M.csv zskp 4 3325605752 476268884 135958 23.33
|
||||
nyc-taxi-data-10M.csv zskp 1 3325605752 641319332 9462 335.17
|
||||
nyc-taxi-data-10M.csv zskp 2 3325605752 588976126 17570 180.50
|
||||
nyc-taxi-data-10M.csv zskp 3 3325605752 529329260 32432 97.79
|
||||
nyc-taxi-data-10M.csv zskp 4 3325605752 474949772 138025 22.98
|
||||
|
||||
nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18
|
||||
nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07
|
||||
nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27
|
||||
nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12
|
||||
|
||||
nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83
|
||||
nyc-taxi-data-10M.csv gzkp 1 3325605752 922257165 16780 189.00
|
||||
nyc-taxi-data-10M.csv gzstd 1 3325605752 928654908 21270 149.11
|
||||
nyc-taxi-data-10M.csv gzkp 1 3325605752 922273214 13929 227.68
|
||||
```
|
||||
|
||||
## Decompressor
|
||||
|
|
|
@ -167,6 +167,11 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
|
|||
}
|
||||
return ErrCompressedSizeTooBig
|
||||
}
|
||||
// Empty compressed blocks must at least be 2 bytes
|
||||
// for Literals_Block_Type and one for Sequences_Section_Header.
|
||||
if cSize < 2 {
|
||||
return ErrBlockTooSmall
|
||||
}
|
||||
case blockTypeRaw:
|
||||
if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
|
||||
if debugDecoder {
|
||||
|
@ -491,6 +496,9 @@ func (b *blockDec) decodeCompressed(hist *history) error {
|
|||
}
|
||||
|
||||
func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
|
||||
if debugDecoder {
|
||||
printf("prepareSequences: %d byte(s) input\n", len(in))
|
||||
}
|
||||
// Decode Sequences
|
||||
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section
|
||||
if len(in) < 1 {
|
||||
|
@ -499,8 +507,6 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
|
|||
var nSeqs int
|
||||
seqHeader := in[0]
|
||||
switch {
|
||||
case seqHeader == 0:
|
||||
in = in[1:]
|
||||
case seqHeader < 128:
|
||||
nSeqs = int(seqHeader)
|
||||
in = in[1:]
|
||||
|
@ -517,6 +523,13 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
|
|||
nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
|
||||
in = in[3:]
|
||||
}
|
||||
if nSeqs == 0 && len(in) != 0 {
|
||||
// When no sequences, there should not be any more data...
|
||||
if debugDecoder {
|
||||
printf("prepareSequences: 0 sequences, but %d byte(s) left on stream\n", len(in))
|
||||
}
|
||||
return ErrUnexpectedBlockSize
|
||||
}
|
||||
|
||||
var seqs = &hist.decoders
|
||||
seqs.nSeqs = nSeqs
|
||||
|
@ -635,6 +648,7 @@ func (b *blockDec) decodeSequences(hist *history) error {
|
|||
hist.decoders.seqSize = len(hist.decoders.literals)
|
||||
return nil
|
||||
}
|
||||
hist.decoders.windowSize = hist.windowSize
|
||||
hist.decoders.prevOffset = hist.recentOffsets
|
||||
err := hist.decoders.decode(b.sequence)
|
||||
hist.recentOffsets = hist.decoders.prevOffset
|
||||
|
|
|
@ -348,10 +348,10 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
|
|||
frame.history.setDict(&dict)
|
||||
}
|
||||
|
||||
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
|
||||
if frame.FrameContentSize != fcsUnknown && frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
|
||||
return dst, ErrDecoderSizeExceeded
|
||||
}
|
||||
if frame.FrameContentSize > 0 && frame.FrameContentSize < 1<<30 {
|
||||
if frame.FrameContentSize < 1<<30 {
|
||||
// Never preallocate more than 1 GB up front.
|
||||
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
|
||||
dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize))
|
||||
|
@ -514,7 +514,7 @@ func (d *Decoder) nextBlockSync() (ok bool) {
|
|||
|
||||
// Check frame size (before CRC)
|
||||
d.syncStream.decodedFrame += uint64(len(d.current.b))
|
||||
if d.frame.FrameContentSize > 0 && d.syncStream.decodedFrame > d.frame.FrameContentSize {
|
||||
if d.syncStream.decodedFrame > d.frame.FrameContentSize {
|
||||
if debugDecoder {
|
||||
printf("DecodedFrame (%d) > FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
|
||||
}
|
||||
|
@ -523,7 +523,7 @@ func (d *Decoder) nextBlockSync() (ok bool) {
|
|||
}
|
||||
|
||||
// Check FCS
|
||||
if d.current.d.Last && d.frame.FrameContentSize > 0 && d.syncStream.decodedFrame != d.frame.FrameContentSize {
|
||||
if d.current.d.Last && d.frame.FrameContentSize != fcsUnknown && d.syncStream.decodedFrame != d.frame.FrameContentSize {
|
||||
if debugDecoder {
|
||||
printf("DecodedFrame (%d) != FrameContentSize (%d)\n", d.syncStream.decodedFrame, d.frame.FrameContentSize)
|
||||
}
|
||||
|
@ -700,6 +700,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
|||
}
|
||||
hist.decoders = block.async.newHist.decoders
|
||||
hist.recentOffsets = block.async.newHist.recentOffsets
|
||||
hist.windowSize = block.async.newHist.windowSize
|
||||
if block.async.newHist.dict != nil {
|
||||
hist.setDict(block.async.newHist.dict)
|
||||
}
|
||||
|
@ -811,11 +812,11 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
|||
}
|
||||
if !hasErr {
|
||||
decodedFrame += uint64(len(do.b))
|
||||
if fcs > 0 && decodedFrame > fcs {
|
||||
if decodedFrame > fcs {
|
||||
println("fcs exceeded", block.Last, fcs, decodedFrame)
|
||||
do.err = ErrFrameSizeExceeded
|
||||
hasErr = true
|
||||
} else if block.Last && fcs > 0 && decodedFrame != fcs {
|
||||
} else if block.Last && fcs != fcsUnknown && decodedFrame != fcs {
|
||||
do.err = ErrFrameSizeMismatch
|
||||
hasErr = true
|
||||
} else {
|
||||
|
|
|
@ -197,7 +197,7 @@ func (d *frameDec) reset(br byteBuffer) error {
|
|||
default:
|
||||
fcsSize = 1 << v
|
||||
}
|
||||
d.FrameContentSize = 0
|
||||
d.FrameContentSize = fcsUnknown
|
||||
if fcsSize > 0 {
|
||||
b, err := br.readSmall(fcsSize)
|
||||
if err != nil {
|
||||
|
@ -343,12 +343,7 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
|||
err = ErrDecoderSizeExceeded
|
||||
break
|
||||
}
|
||||
if d.SingleSegment && uint64(len(d.history.b)) > d.o.maxDecodedSize {
|
||||
println("runDecoder: single segment and", uint64(len(d.history.b)), ">", d.o.maxDecodedSize)
|
||||
err = ErrFrameSizeExceeded
|
||||
break
|
||||
}
|
||||
if d.FrameContentSize > 0 && uint64(len(d.history.b)-crcStart) > d.FrameContentSize {
|
||||
if uint64(len(d.history.b)-crcStart) > d.FrameContentSize {
|
||||
println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize)
|
||||
err = ErrFrameSizeExceeded
|
||||
break
|
||||
|
@ -356,13 +351,13 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
|||
if dec.Last {
|
||||
break
|
||||
}
|
||||
if debugDecoder && d.FrameContentSize > 0 {
|
||||
if debugDecoder {
|
||||
println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize)
|
||||
}
|
||||
}
|
||||
dst = d.history.b
|
||||
if err == nil {
|
||||
if d.FrameContentSize > 0 && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
|
||||
if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
|
||||
err = ErrFrameSizeMismatch
|
||||
} else if d.HasCheckSum {
|
||||
var n int
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
//go:build gofuzz
|
||||
// +build gofuzz
|
||||
//go:build ignorecrc
|
||||
// +build ignorecrc
|
||||
|
||||
// Copyright 2019+ Klaus Post. All rights reserved.
|
||||
// License information can be found in the LICENSE file.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
//go:build !gofuzz
|
||||
// +build !gofuzz
|
||||
//go:build !ignorecrc
|
||||
// +build !ignorecrc
|
||||
|
||||
// Copyright 2019+ Klaus Post. All rights reserved.
|
||||
// License information can be found in the LICENSE file.
|
||||
|
|
|
@ -107,7 +107,10 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
|
|||
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
|
||||
s.seqSize = 0
|
||||
litRemain := len(s.literals)
|
||||
|
||||
maxBlockSize := maxCompressedBlockSize
|
||||
if s.windowSize < maxBlockSize {
|
||||
maxBlockSize = s.windowSize
|
||||
}
|
||||
for i := range seqs {
|
||||
var ll, mo, ml int
|
||||
if br.off > 4+((maxOffsetBits+16+16)>>3) {
|
||||
|
@ -192,7 +195,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
|
|||
}
|
||||
s.seqSize += ll + ml
|
||||
if s.seqSize > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size", s.seqSize)
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||
}
|
||||
litRemain -= ll
|
||||
if litRemain < 0 {
|
||||
|
@ -230,7 +233,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
|
|||
}
|
||||
s.seqSize += litRemain
|
||||
if s.seqSize > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size", s.seqSize)
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||
}
|
||||
err := br.close()
|
||||
if err != nil {
|
||||
|
@ -347,6 +350,10 @@ func (s *sequenceDecs) decodeSync(history *history) error {
|
|||
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
|
||||
hist := history.b[history.ignoreBuffer:]
|
||||
out := s.out
|
||||
maxBlockSize := maxCompressedBlockSize
|
||||
if s.windowSize < maxBlockSize {
|
||||
maxBlockSize = s.windowSize
|
||||
}
|
||||
|
||||
for i := seqs - 1; i >= 0; i-- {
|
||||
if br.overread() {
|
||||
|
@ -426,7 +433,7 @@ func (s *sequenceDecs) decodeSync(history *history) error {
|
|||
}
|
||||
size := ll + ml + len(out)
|
||||
if size-startSize > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size", size)
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
|
||||
}
|
||||
if size > cap(out) {
|
||||
// Not enough size, which can happen under high volume block streaming conditions
|
||||
|
@ -535,6 +542,11 @@ func (s *sequenceDecs) decodeSync(history *history) error {
|
|||
}
|
||||
}
|
||||
|
||||
// Check if space for literals
|
||||
if len(s.literals)+len(s.out)-startSize > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", len(s.out), maxBlockSize)
|
||||
}
|
||||
|
||||
// Add final literals
|
||||
s.out = append(out, s.literals...)
|
||||
return br.close()
|
||||
|
|
|
@ -20,7 +20,7 @@ const ZipMethodPKWare = 20
|
|||
|
||||
var zipReaderPool sync.Pool
|
||||
|
||||
// newZipReader cannot be used since we would leak goroutines...
|
||||
// newZipReader creates a pooled zip decompressor.
|
||||
func newZipReader(r io.Reader) io.ReadCloser {
|
||||
dec, ok := zipReaderPool.Get().(*Decoder)
|
||||
if ok {
|
||||
|
@ -44,10 +44,14 @@ func (r *pooledZipReader) Read(p []byte) (n int, err error) {
|
|||
r.mu.Lock()
|
||||
defer r.mu.Unlock()
|
||||
if r.dec == nil {
|
||||
return 0, errors.New("Read after Close")
|
||||
return 0, errors.New("read after close or EOF")
|
||||
}
|
||||
dec, err := r.dec.Read(p)
|
||||
|
||||
if err == io.EOF {
|
||||
err = r.dec.Reset(nil)
|
||||
zipReaderPool.Put(r.dec)
|
||||
r.dec = nil
|
||||
}
|
||||
return dec, err
|
||||
}
|
||||
|
||||
|
@ -112,11 +116,5 @@ func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
|
|||
// ZipDecompressor returns a decompressor that can be registered with zip libraries.
|
||||
// See ZipCompressor for example.
|
||||
func ZipDecompressor() func(r io.Reader) io.ReadCloser {
|
||||
return func(r io.Reader) io.ReadCloser {
|
||||
d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return d.IOReadCloser()
|
||||
}
|
||||
return newZipReader
|
||||
}
|
||||
|
|
|
@ -39,6 +39,9 @@ const zstdMinMatch = 3
|
|||
// Reset the buffer offset when reaching this.
|
||||
const bufferReset = math.MaxInt32 - MaxWindowSize
|
||||
|
||||
// fcsUnknown is used for unknown frame content size.
|
||||
const fcsUnknown = math.MaxUint64
|
||||
|
||||
var (
|
||||
// ErrReservedBlockType is returned when a reserved block type is found.
|
||||
// Typically this indicates wrong or corrupted input.
|
||||
|
@ -52,6 +55,10 @@ var (
|
|||
// Typically returned on invalid input.
|
||||
ErrBlockTooSmall = errors.New("block too small")
|
||||
|
||||
// ErrUnexpectedBlockSize is returned when a block has unexpected size.
|
||||
// Typically returned on invalid input.
|
||||
ErrUnexpectedBlockSize = errors.New("unexpected block size")
|
||||
|
||||
// ErrMagicMismatch is returned when a "magic" number isn't what is expected.
|
||||
// Typically this indicates wrong or corrupted input.
|
||||
ErrMagicMismatch = errors.New("invalid input: magic number mismatch")
|
||||
|
|
|
@ -133,7 +133,7 @@ github.com/imdario/mergo
|
|||
# github.com/inconshreveable/mousetrap v1.0.0
|
||||
## explicit
|
||||
github.com/inconshreveable/mousetrap
|
||||
# github.com/klauspost/compress v1.15.0
|
||||
# github.com/klauspost/compress v1.15.1
|
||||
## explicit; go 1.15
|
||||
github.com/klauspost/compress
|
||||
github.com/klauspost/compress/fse
|
||||
|
|
Loading…
Reference in New Issue