mirror of https://github.com/docker/cli.git
vendor: github.com/cespare/xxhash/v2 v2.2.0
full diff: https://github.com/cespare/xxhash/compare/v2.1.2...v2.2.0 Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
parent
c1d0657029
commit
5b138189b9
|
@ -50,7 +50,7 @@ require (
|
||||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
|
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
|
||||||
github.com/Microsoft/go-winio v0.5.2 // indirect
|
github.com/Microsoft/go-winio v0.5.2 // indirect
|
||||||
github.com/beorn7/perks v1.0.1 // indirect
|
github.com/beorn7/perks v1.0.1 // indirect
|
||||||
github.com/cespare/xxhash/v2 v2.1.2 // indirect
|
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||||
github.com/docker/go v1.5.1-1.0.20160303222718-d30aec9fd63c // indirect
|
github.com/docker/go v1.5.1-1.0.20160303222718-d30aec9fd63c // indirect
|
||||||
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
|
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
|
||||||
github.com/docker/go-metrics v0.0.1 // indirect
|
github.com/docker/go-metrics v0.0.1 // indirect
|
||||||
|
|
|
@ -67,8 +67,9 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA
|
||||||
github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA=
|
github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA=
|
||||||
github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA=
|
github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA=
|
||||||
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE=
|
|
||||||
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
|
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
|
||||||
|
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||||
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
|
||||||
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
|
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
|
||||||
|
|
|
@ -3,8 +3,7 @@
|
||||||
[![Go Reference](https://pkg.go.dev/badge/github.com/cespare/xxhash/v2.svg)](https://pkg.go.dev/github.com/cespare/xxhash/v2)
|
[![Go Reference](https://pkg.go.dev/badge/github.com/cespare/xxhash/v2.svg)](https://pkg.go.dev/github.com/cespare/xxhash/v2)
|
||||||
[![Test](https://github.com/cespare/xxhash/actions/workflows/test.yml/badge.svg)](https://github.com/cespare/xxhash/actions/workflows/test.yml)
|
[![Test](https://github.com/cespare/xxhash/actions/workflows/test.yml/badge.svg)](https://github.com/cespare/xxhash/actions/workflows/test.yml)
|
||||||
|
|
||||||
xxhash is a Go implementation of the 64-bit
|
xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a
|
||||||
[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
|
|
||||||
high-quality hashing algorithm that is much faster than anything in the Go
|
high-quality hashing algorithm that is much faster than anything in the Go
|
||||||
standard library.
|
standard library.
|
||||||
|
|
||||||
|
@ -25,8 +24,11 @@ func (*Digest) WriteString(string) (int, error)
|
||||||
func (*Digest) Sum64() uint64
|
func (*Digest) Sum64() uint64
|
||||||
```
|
```
|
||||||
|
|
||||||
This implementation provides a fast pure-Go implementation and an even faster
|
The package is written with optimized pure Go and also contains even faster
|
||||||
assembly implementation for amd64.
|
assembly implementations for amd64 and arm64. If desired, the `purego` build tag
|
||||||
|
opts into using the Go code even on those architectures.
|
||||||
|
|
||||||
|
[xxHash]: http://cyan4973.github.io/xxHash/
|
||||||
|
|
||||||
## Compatibility
|
## Compatibility
|
||||||
|
|
||||||
|
@ -46,18 +48,19 @@ Here are some quick benchmarks comparing the pure-Go and assembly
|
||||||
implementations of Sum64.
|
implementations of Sum64.
|
||||||
|
|
||||||
| input size | purego | asm |
|
| input size | purego | asm |
|
||||||
| --- | --- | --- |
|
| ---------- | --------- | --------- |
|
||||||
| 5 B | 979.66 MB/s | 1291.17 MB/s |
|
| 4 B | 1.3 GB/s | 1.2 GB/s |
|
||||||
| 100 B | 7475.26 MB/s | 7973.40 MB/s |
|
| 16 B | 2.9 GB/s | 3.5 GB/s |
|
||||||
| 4 KB | 17573.46 MB/s | 17602.65 MB/s |
|
| 100 B | 6.9 GB/s | 8.1 GB/s |
|
||||||
| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
|
| 4 KB | 11.7 GB/s | 16.7 GB/s |
|
||||||
|
| 10 MB | 12.0 GB/s | 17.3 GB/s |
|
||||||
|
|
||||||
These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
|
These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C
|
||||||
the following commands under Go 1.11.2:
|
CPU using the following commands under Go 1.19.2:
|
||||||
|
|
||||||
```
|
```
|
||||||
$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
|
benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$')
|
||||||
$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
|
benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$')
|
||||||
```
|
```
|
||||||
|
|
||||||
## Projects using this package
|
## Projects using this package
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -eu -o pipefail
|
||||||
|
|
||||||
|
# Small convenience script for running the tests with various combinations of
|
||||||
|
# arch/tags. This assumes we're running on amd64 and have qemu available.
|
||||||
|
|
||||||
|
go test ./...
|
||||||
|
go test -tags purego ./...
|
||||||
|
GOARCH=arm64 go test
|
||||||
|
GOARCH=arm64 go test -tags purego
|
|
@ -16,19 +16,11 @@ const (
|
||||||
prime5 uint64 = 2870177450012600261
|
prime5 uint64 = 2870177450012600261
|
||||||
)
|
)
|
||||||
|
|
||||||
// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
|
// Store the primes in an array as well.
|
||||||
// possible in the Go code is worth a small (but measurable) performance boost
|
//
|
||||||
// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
|
// The consts are used when possible in Go code to avoid MOVs but we need a
|
||||||
// convenience in the Go code in a few places where we need to intentionally
|
// contiguous array of the assembly code.
|
||||||
// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
|
var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
|
||||||
// result overflows a uint64).
|
|
||||||
var (
|
|
||||||
prime1v = prime1
|
|
||||||
prime2v = prime2
|
|
||||||
prime3v = prime3
|
|
||||||
prime4v = prime4
|
|
||||||
prime5v = prime5
|
|
||||||
)
|
|
||||||
|
|
||||||
// Digest implements hash.Hash64.
|
// Digest implements hash.Hash64.
|
||||||
type Digest struct {
|
type Digest struct {
|
||||||
|
@ -50,10 +42,10 @@ func New() *Digest {
|
||||||
|
|
||||||
// Reset clears the Digest's state so that it can be reused.
|
// Reset clears the Digest's state so that it can be reused.
|
||||||
func (d *Digest) Reset() {
|
func (d *Digest) Reset() {
|
||||||
d.v1 = prime1v + prime2
|
d.v1 = primes[0] + prime2
|
||||||
d.v2 = prime2
|
d.v2 = prime2
|
||||||
d.v3 = 0
|
d.v3 = 0
|
||||||
d.v4 = -prime1v
|
d.v4 = -primes[0]
|
||||||
d.total = 0
|
d.total = 0
|
||||||
d.n = 0
|
d.n = 0
|
||||||
}
|
}
|
||||||
|
@ -69,21 +61,23 @@ func (d *Digest) Write(b []byte) (n int, err error) {
|
||||||
n = len(b)
|
n = len(b)
|
||||||
d.total += uint64(n)
|
d.total += uint64(n)
|
||||||
|
|
||||||
|
memleft := d.mem[d.n&(len(d.mem)-1):]
|
||||||
|
|
||||||
if d.n+n < 32 {
|
if d.n+n < 32 {
|
||||||
// This new data doesn't even fill the current block.
|
// This new data doesn't even fill the current block.
|
||||||
copy(d.mem[d.n:], b)
|
copy(memleft, b)
|
||||||
d.n += n
|
d.n += n
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if d.n > 0 {
|
if d.n > 0 {
|
||||||
// Finish off the partial block.
|
// Finish off the partial block.
|
||||||
copy(d.mem[d.n:], b)
|
c := copy(memleft, b)
|
||||||
d.v1 = round(d.v1, u64(d.mem[0:8]))
|
d.v1 = round(d.v1, u64(d.mem[0:8]))
|
||||||
d.v2 = round(d.v2, u64(d.mem[8:16]))
|
d.v2 = round(d.v2, u64(d.mem[8:16]))
|
||||||
d.v3 = round(d.v3, u64(d.mem[16:24]))
|
d.v3 = round(d.v3, u64(d.mem[16:24]))
|
||||||
d.v4 = round(d.v4, u64(d.mem[24:32]))
|
d.v4 = round(d.v4, u64(d.mem[24:32]))
|
||||||
b = b[32-d.n:]
|
b = b[c:]
|
||||||
d.n = 0
|
d.n = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,21 +127,20 @@ func (d *Digest) Sum64() uint64 {
|
||||||
|
|
||||||
h += d.total
|
h += d.total
|
||||||
|
|
||||||
i, end := 0, d.n
|
b := d.mem[:d.n&(len(d.mem)-1)]
|
||||||
for ; i+8 <= end; i += 8 {
|
for ; len(b) >= 8; b = b[8:] {
|
||||||
k1 := round(0, u64(d.mem[i:i+8]))
|
k1 := round(0, u64(b[:8]))
|
||||||
h ^= k1
|
h ^= k1
|
||||||
h = rol27(h)*prime1 + prime4
|
h = rol27(h)*prime1 + prime4
|
||||||
}
|
}
|
||||||
if i+4 <= end {
|
if len(b) >= 4 {
|
||||||
h ^= uint64(u32(d.mem[i:i+4])) * prime1
|
h ^= uint64(u32(b[:4])) * prime1
|
||||||
h = rol23(h)*prime2 + prime3
|
h = rol23(h)*prime2 + prime3
|
||||||
i += 4
|
b = b[4:]
|
||||||
}
|
}
|
||||||
for i < end {
|
for ; len(b) > 0; b = b[1:] {
|
||||||
h ^= uint64(d.mem[i]) * prime5
|
h ^= uint64(b[0]) * prime5
|
||||||
h = rol11(h) * prime1
|
h = rol11(h) * prime1
|
||||||
i++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
h ^= h >> 33
|
h ^= h >> 33
|
||||||
|
|
|
@ -1,215 +1,209 @@
|
||||||
|
//go:build !appengine && gc && !purego
|
||||||
// +build !appengine
|
// +build !appengine
|
||||||
// +build gc
|
// +build gc
|
||||||
// +build !purego
|
// +build !purego
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
// Register allocation:
|
// Registers:
|
||||||
// AX h
|
#define h AX
|
||||||
// SI pointer to advance through b
|
#define d AX
|
||||||
// DX n
|
#define p SI // pointer to advance through b
|
||||||
// BX loop end
|
#define n DX
|
||||||
// R8 v1, k1
|
#define end BX // loop end
|
||||||
// R9 v2
|
#define v1 R8
|
||||||
// R10 v3
|
#define v2 R9
|
||||||
// R11 v4
|
#define v3 R10
|
||||||
// R12 tmp
|
#define v4 R11
|
||||||
// R13 prime1v
|
#define x R12
|
||||||
// R14 prime2v
|
#define prime1 R13
|
||||||
// DI prime4v
|
#define prime2 R14
|
||||||
|
#define prime4 DI
|
||||||
|
|
||||||
// round reads from and advances the buffer pointer in SI.
|
#define round(acc, x) \
|
||||||
// It assumes that R13 has prime1v and R14 has prime2v.
|
IMULQ prime2, x \
|
||||||
#define round(r) \
|
ADDQ x, acc \
|
||||||
MOVQ (SI), R12 \
|
ROLQ $31, acc \
|
||||||
ADDQ $8, SI \
|
IMULQ prime1, acc
|
||||||
IMULQ R14, R12 \
|
|
||||||
ADDQ R12, r \
|
|
||||||
ROLQ $31, r \
|
|
||||||
IMULQ R13, r
|
|
||||||
|
|
||||||
// mergeRound applies a merge round on the two registers acc and val.
|
// round0 performs the operation x = round(0, x).
|
||||||
// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
|
#define round0(x) \
|
||||||
#define mergeRound(acc, val) \
|
IMULQ prime2, x \
|
||||||
IMULQ R14, val \
|
ROLQ $31, x \
|
||||||
ROLQ $31, val \
|
IMULQ prime1, x
|
||||||
IMULQ R13, val \
|
|
||||||
XORQ val, acc \
|
// mergeRound applies a merge round on the two registers acc and x.
|
||||||
IMULQ R13, acc \
|
// It assumes that prime1, prime2, and prime4 have been loaded.
|
||||||
ADDQ DI, acc
|
#define mergeRound(acc, x) \
|
||||||
|
round0(x) \
|
||||||
|
XORQ x, acc \
|
||||||
|
IMULQ prime1, acc \
|
||||||
|
ADDQ prime4, acc
|
||||||
|
|
||||||
|
// blockLoop processes as many 32-byte blocks as possible,
|
||||||
|
// updating v1, v2, v3, and v4. It assumes that there is at least one block
|
||||||
|
// to process.
|
||||||
|
#define blockLoop() \
|
||||||
|
loop: \
|
||||||
|
MOVQ +0(p), x \
|
||||||
|
round(v1, x) \
|
||||||
|
MOVQ +8(p), x \
|
||||||
|
round(v2, x) \
|
||||||
|
MOVQ +16(p), x \
|
||||||
|
round(v3, x) \
|
||||||
|
MOVQ +24(p), x \
|
||||||
|
round(v4, x) \
|
||||||
|
ADDQ $32, p \
|
||||||
|
CMPQ p, end \
|
||||||
|
JLE loop
|
||||||
|
|
||||||
// func Sum64(b []byte) uint64
|
// func Sum64(b []byte) uint64
|
||||||
TEXT ·Sum64(SB), NOSPLIT, $0-32
|
TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
|
||||||
// Load fixed primes.
|
// Load fixed primes.
|
||||||
MOVQ ·prime1v(SB), R13
|
MOVQ ·primes+0(SB), prime1
|
||||||
MOVQ ·prime2v(SB), R14
|
MOVQ ·primes+8(SB), prime2
|
||||||
MOVQ ·prime4v(SB), DI
|
MOVQ ·primes+24(SB), prime4
|
||||||
|
|
||||||
// Load slice.
|
// Load slice.
|
||||||
MOVQ b_base+0(FP), SI
|
MOVQ b_base+0(FP), p
|
||||||
MOVQ b_len+8(FP), DX
|
MOVQ b_len+8(FP), n
|
||||||
LEAQ (SI)(DX*1), BX
|
LEAQ (p)(n*1), end
|
||||||
|
|
||||||
// The first loop limit will be len(b)-32.
|
// The first loop limit will be len(b)-32.
|
||||||
SUBQ $32, BX
|
SUBQ $32, end
|
||||||
|
|
||||||
// Check whether we have at least one block.
|
// Check whether we have at least one block.
|
||||||
CMPQ DX, $32
|
CMPQ n, $32
|
||||||
JLT noBlocks
|
JLT noBlocks
|
||||||
|
|
||||||
// Set up initial state (v1, v2, v3, v4).
|
// Set up initial state (v1, v2, v3, v4).
|
||||||
MOVQ R13, R8
|
MOVQ prime1, v1
|
||||||
ADDQ R14, R8
|
ADDQ prime2, v1
|
||||||
MOVQ R14, R9
|
MOVQ prime2, v2
|
||||||
XORQ R10, R10
|
XORQ v3, v3
|
||||||
XORQ R11, R11
|
XORQ v4, v4
|
||||||
SUBQ R13, R11
|
SUBQ prime1, v4
|
||||||
|
|
||||||
// Loop until SI > BX.
|
blockLoop()
|
||||||
blockLoop:
|
|
||||||
round(R8)
|
|
||||||
round(R9)
|
|
||||||
round(R10)
|
|
||||||
round(R11)
|
|
||||||
|
|
||||||
CMPQ SI, BX
|
MOVQ v1, h
|
||||||
JLE blockLoop
|
ROLQ $1, h
|
||||||
|
MOVQ v2, x
|
||||||
|
ROLQ $7, x
|
||||||
|
ADDQ x, h
|
||||||
|
MOVQ v3, x
|
||||||
|
ROLQ $12, x
|
||||||
|
ADDQ x, h
|
||||||
|
MOVQ v4, x
|
||||||
|
ROLQ $18, x
|
||||||
|
ADDQ x, h
|
||||||
|
|
||||||
MOVQ R8, AX
|
mergeRound(h, v1)
|
||||||
ROLQ $1, AX
|
mergeRound(h, v2)
|
||||||
MOVQ R9, R12
|
mergeRound(h, v3)
|
||||||
ROLQ $7, R12
|
mergeRound(h, v4)
|
||||||
ADDQ R12, AX
|
|
||||||
MOVQ R10, R12
|
|
||||||
ROLQ $12, R12
|
|
||||||
ADDQ R12, AX
|
|
||||||
MOVQ R11, R12
|
|
||||||
ROLQ $18, R12
|
|
||||||
ADDQ R12, AX
|
|
||||||
|
|
||||||
mergeRound(AX, R8)
|
|
||||||
mergeRound(AX, R9)
|
|
||||||
mergeRound(AX, R10)
|
|
||||||
mergeRound(AX, R11)
|
|
||||||
|
|
||||||
JMP afterBlocks
|
JMP afterBlocks
|
||||||
|
|
||||||
noBlocks:
|
noBlocks:
|
||||||
MOVQ ·prime5v(SB), AX
|
MOVQ ·primes+32(SB), h
|
||||||
|
|
||||||
afterBlocks:
|
afterBlocks:
|
||||||
ADDQ DX, AX
|
ADDQ n, h
|
||||||
|
|
||||||
// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
|
ADDQ $24, end
|
||||||
ADDQ $24, BX
|
CMPQ p, end
|
||||||
|
JG try4
|
||||||
|
|
||||||
CMPQ SI, BX
|
loop8:
|
||||||
JG fourByte
|
MOVQ (p), x
|
||||||
|
ADDQ $8, p
|
||||||
|
round0(x)
|
||||||
|
XORQ x, h
|
||||||
|
ROLQ $27, h
|
||||||
|
IMULQ prime1, h
|
||||||
|
ADDQ prime4, h
|
||||||
|
|
||||||
wordLoop:
|
CMPQ p, end
|
||||||
// Calculate k1.
|
JLE loop8
|
||||||
MOVQ (SI), R8
|
|
||||||
ADDQ $8, SI
|
|
||||||
IMULQ R14, R8
|
|
||||||
ROLQ $31, R8
|
|
||||||
IMULQ R13, R8
|
|
||||||
|
|
||||||
XORQ R8, AX
|
try4:
|
||||||
ROLQ $27, AX
|
ADDQ $4, end
|
||||||
IMULQ R13, AX
|
CMPQ p, end
|
||||||
ADDQ DI, AX
|
JG try1
|
||||||
|
|
||||||
CMPQ SI, BX
|
MOVL (p), x
|
||||||
JLE wordLoop
|
ADDQ $4, p
|
||||||
|
IMULQ prime1, x
|
||||||
|
XORQ x, h
|
||||||
|
|
||||||
fourByte:
|
ROLQ $23, h
|
||||||
ADDQ $4, BX
|
IMULQ prime2, h
|
||||||
CMPQ SI, BX
|
ADDQ ·primes+16(SB), h
|
||||||
JG singles
|
|
||||||
|
|
||||||
MOVL (SI), R8
|
try1:
|
||||||
ADDQ $4, SI
|
ADDQ $4, end
|
||||||
IMULQ R13, R8
|
CMPQ p, end
|
||||||
XORQ R8, AX
|
|
||||||
|
|
||||||
ROLQ $23, AX
|
|
||||||
IMULQ R14, AX
|
|
||||||
ADDQ ·prime3v(SB), AX
|
|
||||||
|
|
||||||
singles:
|
|
||||||
ADDQ $4, BX
|
|
||||||
CMPQ SI, BX
|
|
||||||
JGE finalize
|
JGE finalize
|
||||||
|
|
||||||
singlesLoop:
|
loop1:
|
||||||
MOVBQZX (SI), R12
|
MOVBQZX (p), x
|
||||||
ADDQ $1, SI
|
ADDQ $1, p
|
||||||
IMULQ ·prime5v(SB), R12
|
IMULQ ·primes+32(SB), x
|
||||||
XORQ R12, AX
|
XORQ x, h
|
||||||
|
ROLQ $11, h
|
||||||
|
IMULQ prime1, h
|
||||||
|
|
||||||
ROLQ $11, AX
|
CMPQ p, end
|
||||||
IMULQ R13, AX
|
JL loop1
|
||||||
|
|
||||||
CMPQ SI, BX
|
|
||||||
JL singlesLoop
|
|
||||||
|
|
||||||
finalize:
|
finalize:
|
||||||
MOVQ AX, R12
|
MOVQ h, x
|
||||||
SHRQ $33, R12
|
SHRQ $33, x
|
||||||
XORQ R12, AX
|
XORQ x, h
|
||||||
IMULQ R14, AX
|
IMULQ prime2, h
|
||||||
MOVQ AX, R12
|
MOVQ h, x
|
||||||
SHRQ $29, R12
|
SHRQ $29, x
|
||||||
XORQ R12, AX
|
XORQ x, h
|
||||||
IMULQ ·prime3v(SB), AX
|
IMULQ ·primes+16(SB), h
|
||||||
MOVQ AX, R12
|
MOVQ h, x
|
||||||
SHRQ $32, R12
|
SHRQ $32, x
|
||||||
XORQ R12, AX
|
XORQ x, h
|
||||||
|
|
||||||
MOVQ AX, ret+24(FP)
|
MOVQ h, ret+24(FP)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// writeBlocks uses the same registers as above except that it uses AX to store
|
|
||||||
// the d pointer.
|
|
||||||
|
|
||||||
// func writeBlocks(d *Digest, b []byte) int
|
// func writeBlocks(d *Digest, b []byte) int
|
||||||
TEXT ·writeBlocks(SB), NOSPLIT, $0-40
|
TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
|
||||||
// Load fixed primes needed for round.
|
// Load fixed primes needed for round.
|
||||||
MOVQ ·prime1v(SB), R13
|
MOVQ ·primes+0(SB), prime1
|
||||||
MOVQ ·prime2v(SB), R14
|
MOVQ ·primes+8(SB), prime2
|
||||||
|
|
||||||
// Load slice.
|
// Load slice.
|
||||||
MOVQ b_base+8(FP), SI
|
MOVQ b_base+8(FP), p
|
||||||
MOVQ b_len+16(FP), DX
|
MOVQ b_len+16(FP), n
|
||||||
LEAQ (SI)(DX*1), BX
|
LEAQ (p)(n*1), end
|
||||||
SUBQ $32, BX
|
SUBQ $32, end
|
||||||
|
|
||||||
// Load vN from d.
|
// Load vN from d.
|
||||||
MOVQ d+0(FP), AX
|
MOVQ s+0(FP), d
|
||||||
MOVQ 0(AX), R8 // v1
|
MOVQ 0(d), v1
|
||||||
MOVQ 8(AX), R9 // v2
|
MOVQ 8(d), v2
|
||||||
MOVQ 16(AX), R10 // v3
|
MOVQ 16(d), v3
|
||||||
MOVQ 24(AX), R11 // v4
|
MOVQ 24(d), v4
|
||||||
|
|
||||||
// We don't need to check the loop condition here; this function is
|
// We don't need to check the loop condition here; this function is
|
||||||
// always called with at least one block of data to process.
|
// always called with at least one block of data to process.
|
||||||
blockLoop:
|
blockLoop()
|
||||||
round(R8)
|
|
||||||
round(R9)
|
|
||||||
round(R10)
|
|
||||||
round(R11)
|
|
||||||
|
|
||||||
CMPQ SI, BX
|
|
||||||
JLE blockLoop
|
|
||||||
|
|
||||||
// Copy vN back to d.
|
// Copy vN back to d.
|
||||||
MOVQ R8, 0(AX)
|
MOVQ v1, 0(d)
|
||||||
MOVQ R9, 8(AX)
|
MOVQ v2, 8(d)
|
||||||
MOVQ R10, 16(AX)
|
MOVQ v3, 16(d)
|
||||||
MOVQ R11, 24(AX)
|
MOVQ v4, 24(d)
|
||||||
|
|
||||||
// The number of bytes written is SI minus the old base pointer.
|
// The number of bytes written is p minus the old base pointer.
|
||||||
SUBQ b_base+8(FP), SI
|
SUBQ b_base+8(FP), p
|
||||||
MOVQ SI, ret+32(FP)
|
MOVQ p, ret+32(FP)
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
|
|
@ -0,0 +1,183 @@
|
||||||
|
//go:build !appengine && gc && !purego
|
||||||
|
// +build !appengine
|
||||||
|
// +build gc
|
||||||
|
// +build !purego
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
// Registers:
|
||||||
|
#define digest R1
|
||||||
|
#define h R2 // return value
|
||||||
|
#define p R3 // input pointer
|
||||||
|
#define n R4 // input length
|
||||||
|
#define nblocks R5 // n / 32
|
||||||
|
#define prime1 R7
|
||||||
|
#define prime2 R8
|
||||||
|
#define prime3 R9
|
||||||
|
#define prime4 R10
|
||||||
|
#define prime5 R11
|
||||||
|
#define v1 R12
|
||||||
|
#define v2 R13
|
||||||
|
#define v3 R14
|
||||||
|
#define v4 R15
|
||||||
|
#define x1 R20
|
||||||
|
#define x2 R21
|
||||||
|
#define x3 R22
|
||||||
|
#define x4 R23
|
||||||
|
|
||||||
|
#define round(acc, x) \
|
||||||
|
MADD prime2, acc, x, acc \
|
||||||
|
ROR $64-31, acc \
|
||||||
|
MUL prime1, acc
|
||||||
|
|
||||||
|
// round0 performs the operation x = round(0, x).
|
||||||
|
#define round0(x) \
|
||||||
|
MUL prime2, x \
|
||||||
|
ROR $64-31, x \
|
||||||
|
MUL prime1, x
|
||||||
|
|
||||||
|
#define mergeRound(acc, x) \
|
||||||
|
round0(x) \
|
||||||
|
EOR x, acc \
|
||||||
|
MADD acc, prime4, prime1, acc
|
||||||
|
|
||||||
|
// blockLoop processes as many 32-byte blocks as possible,
|
||||||
|
// updating v1, v2, v3, and v4. It assumes that n >= 32.
|
||||||
|
#define blockLoop() \
|
||||||
|
LSR $5, n, nblocks \
|
||||||
|
PCALIGN $16 \
|
||||||
|
loop: \
|
||||||
|
LDP.P 16(p), (x1, x2) \
|
||||||
|
LDP.P 16(p), (x3, x4) \
|
||||||
|
round(v1, x1) \
|
||||||
|
round(v2, x2) \
|
||||||
|
round(v3, x3) \
|
||||||
|
round(v4, x4) \
|
||||||
|
SUB $1, nblocks \
|
||||||
|
CBNZ nblocks, loop
|
||||||
|
|
||||||
|
// func Sum64(b []byte) uint64
|
||||||
|
TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
|
||||||
|
LDP b_base+0(FP), (p, n)
|
||||||
|
|
||||||
|
LDP ·primes+0(SB), (prime1, prime2)
|
||||||
|
LDP ·primes+16(SB), (prime3, prime4)
|
||||||
|
MOVD ·primes+32(SB), prime5
|
||||||
|
|
||||||
|
CMP $32, n
|
||||||
|
CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 }
|
||||||
|
BLT afterLoop
|
||||||
|
|
||||||
|
ADD prime1, prime2, v1
|
||||||
|
MOVD prime2, v2
|
||||||
|
MOVD $0, v3
|
||||||
|
NEG prime1, v4
|
||||||
|
|
||||||
|
blockLoop()
|
||||||
|
|
||||||
|
ROR $64-1, v1, x1
|
||||||
|
ROR $64-7, v2, x2
|
||||||
|
ADD x1, x2
|
||||||
|
ROR $64-12, v3, x3
|
||||||
|
ROR $64-18, v4, x4
|
||||||
|
ADD x3, x4
|
||||||
|
ADD x2, x4, h
|
||||||
|
|
||||||
|
mergeRound(h, v1)
|
||||||
|
mergeRound(h, v2)
|
||||||
|
mergeRound(h, v3)
|
||||||
|
mergeRound(h, v4)
|
||||||
|
|
||||||
|
afterLoop:
|
||||||
|
ADD n, h
|
||||||
|
|
||||||
|
TBZ $4, n, try8
|
||||||
|
LDP.P 16(p), (x1, x2)
|
||||||
|
|
||||||
|
round0(x1)
|
||||||
|
|
||||||
|
// NOTE: here and below, sequencing the EOR after the ROR (using a
|
||||||
|
// rotated register) is worth a small but measurable speedup for small
|
||||||
|
// inputs.
|
||||||
|
ROR $64-27, h
|
||||||
|
EOR x1 @> 64-27, h, h
|
||||||
|
MADD h, prime4, prime1, h
|
||||||
|
|
||||||
|
round0(x2)
|
||||||
|
ROR $64-27, h
|
||||||
|
EOR x2 @> 64-27, h, h
|
||||||
|
MADD h, prime4, prime1, h
|
||||||
|
|
||||||
|
try8:
|
||||||
|
TBZ $3, n, try4
|
||||||
|
MOVD.P 8(p), x1
|
||||||
|
|
||||||
|
round0(x1)
|
||||||
|
ROR $64-27, h
|
||||||
|
EOR x1 @> 64-27, h, h
|
||||||
|
MADD h, prime4, prime1, h
|
||||||
|
|
||||||
|
try4:
|
||||||
|
TBZ $2, n, try2
|
||||||
|
MOVWU.P 4(p), x2
|
||||||
|
|
||||||
|
MUL prime1, x2
|
||||||
|
ROR $64-23, h
|
||||||
|
EOR x2 @> 64-23, h, h
|
||||||
|
MADD h, prime3, prime2, h
|
||||||
|
|
||||||
|
try2:
|
||||||
|
TBZ $1, n, try1
|
||||||
|
MOVHU.P 2(p), x3
|
||||||
|
AND $255, x3, x1
|
||||||
|
LSR $8, x3, x2
|
||||||
|
|
||||||
|
MUL prime5, x1
|
||||||
|
ROR $64-11, h
|
||||||
|
EOR x1 @> 64-11, h, h
|
||||||
|
MUL prime1, h
|
||||||
|
|
||||||
|
MUL prime5, x2
|
||||||
|
ROR $64-11, h
|
||||||
|
EOR x2 @> 64-11, h, h
|
||||||
|
MUL prime1, h
|
||||||
|
|
||||||
|
try1:
|
||||||
|
TBZ $0, n, finalize
|
||||||
|
MOVBU (p), x4
|
||||||
|
|
||||||
|
MUL prime5, x4
|
||||||
|
ROR $64-11, h
|
||||||
|
EOR x4 @> 64-11, h, h
|
||||||
|
MUL prime1, h
|
||||||
|
|
||||||
|
finalize:
|
||||||
|
EOR h >> 33, h
|
||||||
|
MUL prime2, h
|
||||||
|
EOR h >> 29, h
|
||||||
|
MUL prime3, h
|
||||||
|
EOR h >> 32, h
|
||||||
|
|
||||||
|
MOVD h, ret+24(FP)
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func writeBlocks(d *Digest, b []byte) int
|
||||||
|
TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
|
||||||
|
LDP ·primes+0(SB), (prime1, prime2)
|
||||||
|
|
||||||
|
// Load state. Assume v[1-4] are stored contiguously.
|
||||||
|
MOVD d+0(FP), digest
|
||||||
|
LDP 0(digest), (v1, v2)
|
||||||
|
LDP 16(digest), (v3, v4)
|
||||||
|
|
||||||
|
LDP b_base+8(FP), (p, n)
|
||||||
|
|
||||||
|
blockLoop()
|
||||||
|
|
||||||
|
// Store updated state.
|
||||||
|
STP (v1, v2), 0(digest)
|
||||||
|
STP (v3, v4), 16(digest)
|
||||||
|
|
||||||
|
BIC $31, n
|
||||||
|
MOVD n, ret+32(FP)
|
||||||
|
RET
|
|
@ -1,3 +1,5 @@
|
||||||
|
//go:build (amd64 || arm64) && !appengine && gc && !purego
|
||||||
|
// +build amd64 arm64
|
||||||
// +build !appengine
|
// +build !appengine
|
||||||
// +build gc
|
// +build gc
|
||||||
// +build !purego
|
// +build !purego
|
|
@ -1,4 +1,5 @@
|
||||||
// +build !amd64 appengine !gc purego
|
//go:build (!amd64 && !arm64) || appengine || !gc || purego
|
||||||
|
// +build !amd64,!arm64 appengine !gc purego
|
||||||
|
|
||||||
package xxhash
|
package xxhash
|
||||||
|
|
||||||
|
@ -14,10 +15,10 @@ func Sum64(b []byte) uint64 {
|
||||||
var h uint64
|
var h uint64
|
||||||
|
|
||||||
if n >= 32 {
|
if n >= 32 {
|
||||||
v1 := prime1v + prime2
|
v1 := primes[0] + prime2
|
||||||
v2 := prime2
|
v2 := prime2
|
||||||
v3 := uint64(0)
|
v3 := uint64(0)
|
||||||
v4 := -prime1v
|
v4 := -primes[0]
|
||||||
for len(b) >= 32 {
|
for len(b) >= 32 {
|
||||||
v1 = round(v1, u64(b[0:8:len(b)]))
|
v1 = round(v1, u64(b[0:8:len(b)]))
|
||||||
v2 = round(v2, u64(b[8:16:len(b)]))
|
v2 = round(v2, u64(b[8:16:len(b)]))
|
||||||
|
@ -36,19 +37,18 @@ func Sum64(b []byte) uint64 {
|
||||||
|
|
||||||
h += uint64(n)
|
h += uint64(n)
|
||||||
|
|
||||||
i, end := 0, len(b)
|
for ; len(b) >= 8; b = b[8:] {
|
||||||
for ; i+8 <= end; i += 8 {
|
k1 := round(0, u64(b[:8]))
|
||||||
k1 := round(0, u64(b[i:i+8:len(b)]))
|
|
||||||
h ^= k1
|
h ^= k1
|
||||||
h = rol27(h)*prime1 + prime4
|
h = rol27(h)*prime1 + prime4
|
||||||
}
|
}
|
||||||
if i+4 <= end {
|
if len(b) >= 4 {
|
||||||
h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
|
h ^= uint64(u32(b[:4])) * prime1
|
||||||
h = rol23(h)*prime2 + prime3
|
h = rol23(h)*prime2 + prime3
|
||||||
i += 4
|
b = b[4:]
|
||||||
}
|
}
|
||||||
for ; i < end; i++ {
|
for ; len(b) > 0; b = b[1:] {
|
||||||
h ^= uint64(b[i]) * prime5
|
h ^= uint64(b[0]) * prime5
|
||||||
h = rol11(h) * prime1
|
h = rol11(h) * prime1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
//go:build appengine
|
||||||
// +build appengine
|
// +build appengine
|
||||||
|
|
||||||
// This file contains the safe implementations of otherwise unsafe-using code.
|
// This file contains the safe implementations of otherwise unsafe-using code.
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
//go:build !appengine
|
||||||
// +build !appengine
|
// +build !appengine
|
||||||
|
|
||||||
// This file encapsulates usage of unsafe.
|
// This file encapsulates usage of unsafe.
|
||||||
|
@ -11,7 +12,7 @@ import (
|
||||||
|
|
||||||
// In the future it's possible that compiler optimizations will make these
|
// In the future it's possible that compiler optimizations will make these
|
||||||
// XxxString functions unnecessary by realizing that calls such as
|
// XxxString functions unnecessary by realizing that calls such as
|
||||||
// Sum64([]byte(s)) don't need to copy s. See https://golang.org/issue/2205.
|
// Sum64([]byte(s)) don't need to copy s. See https://go.dev/issue/2205.
|
||||||
// If that happens, even if we keep these functions they can be replaced with
|
// If that happens, even if we keep these functions they can be replaced with
|
||||||
// the trivial safe code.
|
// the trivial safe code.
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ github.com/Microsoft/go-winio/pkg/guid
|
||||||
# github.com/beorn7/perks v1.0.1
|
# github.com/beorn7/perks v1.0.1
|
||||||
## explicit; go 1.11
|
## explicit; go 1.11
|
||||||
github.com/beorn7/perks/quantile
|
github.com/beorn7/perks/quantile
|
||||||
# github.com/cespare/xxhash/v2 v2.1.2
|
# github.com/cespare/xxhash/v2 v2.2.0
|
||||||
## explicit; go 1.11
|
## explicit; go 1.11
|
||||||
github.com/cespare/xxhash/v2
|
github.com/cespare/xxhash/v2
|
||||||
# github.com/container-orchestrated-devices/container-device-interface v0.6.0
|
# github.com/container-orchestrated-devices/container-device-interface v0.6.0
|
||||||
|
|
Loading…
Reference in New Issue