From 5b138189b9345feecb70d782844dfd65d7b5168a Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Thu, 20 Jul 2023 00:21:37 +0200 Subject: [PATCH] vendor: github.com/cespare/xxhash/v2 v2.2.0 full diff: https://github.com/cespare/xxhash/compare/v2.1.2...v2.2.0 Signed-off-by: Sebastiaan van Stijn --- vendor.mod | 2 +- vendor.sum | 3 +- vendor/github.com/cespare/xxhash/v2/README.md | 31 +- .../github.com/cespare/xxhash/v2/testall.sh | 10 + vendor/github.com/cespare/xxhash/v2/xxhash.go | 47 ++- .../cespare/xxhash/v2/xxhash_amd64.s | 308 +++++++++--------- .../cespare/xxhash/v2/xxhash_arm64.s | 183 +++++++++++ .../v2/{xxhash_amd64.go => xxhash_asm.go} | 2 + .../cespare/xxhash/v2/xxhash_other.go | 22 +- .../cespare/xxhash/v2/xxhash_safe.go | 1 + .../cespare/xxhash/v2/xxhash_unsafe.go | 3 +- vendor/modules.txt | 2 +- 12 files changed, 401 insertions(+), 213 deletions(-) create mode 100644 vendor/github.com/cespare/xxhash/v2/testall.sh create mode 100644 vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s rename vendor/github.com/cespare/xxhash/v2/{xxhash_amd64.go => xxhash_asm.go} (73%) diff --git a/vendor.mod b/vendor.mod index e74b242038..917d01b5cc 100644 --- a/vendor.mod +++ b/vendor.mod @@ -50,7 +50,7 @@ require ( github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect github.com/Microsoft/go-winio v0.5.2 // indirect github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.1.2 // indirect + github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/docker/go v1.5.1-1.0.20160303222718-d30aec9fd63c // indirect github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect github.com/docker/go-metrics v0.0.1 // indirect diff --git a/vendor.sum b/vendor.sum index ceae47f4a9..4ae44c794d 100644 --- a/vendor.sum +++ b/vendor.sum @@ -67,8 +67,9 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= diff --git a/vendor/github.com/cespare/xxhash/v2/README.md b/vendor/github.com/cespare/xxhash/v2/README.md index 792b4a60b3..8bf0e5b781 100644 --- a/vendor/github.com/cespare/xxhash/v2/README.md +++ b/vendor/github.com/cespare/xxhash/v2/README.md @@ -3,8 +3,7 @@ [![Go Reference](https://pkg.go.dev/badge/github.com/cespare/xxhash/v2.svg)](https://pkg.go.dev/github.com/cespare/xxhash/v2) [![Test](https://github.com/cespare/xxhash/actions/workflows/test.yml/badge.svg)](https://github.com/cespare/xxhash/actions/workflows/test.yml) -xxhash is a Go implementation of the 64-bit -[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a +xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a high-quality hashing algorithm that is much faster than anything in the Go standard library. @@ -25,8 +24,11 @@ func (*Digest) WriteString(string) (int, error) func (*Digest) Sum64() uint64 ``` -This implementation provides a fast pure-Go implementation and an even faster -assembly implementation for amd64. +The package is written with optimized pure Go and also contains even faster +assembly implementations for amd64 and arm64. If desired, the `purego` build tag +opts into using the Go code even on those architectures. + +[xxHash]: http://cyan4973.github.io/xxHash/ ## Compatibility @@ -45,19 +47,20 @@ I recommend using the latest release of Go. Here are some quick benchmarks comparing the pure-Go and assembly implementations of Sum64. -| input size | purego | asm | -| --- | --- | --- | -| 5 B | 979.66 MB/s | 1291.17 MB/s | -| 100 B | 7475.26 MB/s | 7973.40 MB/s | -| 4 KB | 17573.46 MB/s | 17602.65 MB/s | -| 10 MB | 17131.46 MB/s | 17142.16 MB/s | +| input size | purego | asm | +| ---------- | --------- | --------- | +| 4 B | 1.3 GB/s | 1.2 GB/s | +| 16 B | 2.9 GB/s | 3.5 GB/s | +| 100 B | 6.9 GB/s | 8.1 GB/s | +| 4 KB | 11.7 GB/s | 16.7 GB/s | +| 10 MB | 12.0 GB/s | 17.3 GB/s | -These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using -the following commands under Go 1.11.2: +These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C +CPU using the following commands under Go 1.19.2: ``` -$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes' -$ go test -benchtime 10s -bench '/xxhash,direct,bytes' +benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$') +benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$') ``` ## Projects using this package diff --git a/vendor/github.com/cespare/xxhash/v2/testall.sh b/vendor/github.com/cespare/xxhash/v2/testall.sh new file mode 100644 index 0000000000..94b9c44398 --- /dev/null +++ b/vendor/github.com/cespare/xxhash/v2/testall.sh @@ -0,0 +1,10 @@ +#!/bin/bash +set -eu -o pipefail + +# Small convenience script for running the tests with various combinations of +# arch/tags. This assumes we're running on amd64 and have qemu available. + +go test ./... +go test -tags purego ./... +GOARCH=arm64 go test +GOARCH=arm64 go test -tags purego diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash.go b/vendor/github.com/cespare/xxhash/v2/xxhash.go index 15c835d541..a9e0d45c9d 100644 --- a/vendor/github.com/cespare/xxhash/v2/xxhash.go +++ b/vendor/github.com/cespare/xxhash/v2/xxhash.go @@ -16,19 +16,11 @@ const ( prime5 uint64 = 2870177450012600261 ) -// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where -// possible in the Go code is worth a small (but measurable) performance boost -// by avoiding some MOVQs. Vars are needed for the asm and also are useful for -// convenience in the Go code in a few places where we need to intentionally -// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the -// result overflows a uint64). -var ( - prime1v = prime1 - prime2v = prime2 - prime3v = prime3 - prime4v = prime4 - prime5v = prime5 -) +// Store the primes in an array as well. +// +// The consts are used when possible in Go code to avoid MOVs but we need a +// contiguous array of the assembly code. +var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5} // Digest implements hash.Hash64. type Digest struct { @@ -50,10 +42,10 @@ func New() *Digest { // Reset clears the Digest's state so that it can be reused. func (d *Digest) Reset() { - d.v1 = prime1v + prime2 + d.v1 = primes[0] + prime2 d.v2 = prime2 d.v3 = 0 - d.v4 = -prime1v + d.v4 = -primes[0] d.total = 0 d.n = 0 } @@ -69,21 +61,23 @@ func (d *Digest) Write(b []byte) (n int, err error) { n = len(b) d.total += uint64(n) + memleft := d.mem[d.n&(len(d.mem)-1):] + if d.n+n < 32 { // This new data doesn't even fill the current block. - copy(d.mem[d.n:], b) + copy(memleft, b) d.n += n return } if d.n > 0 { // Finish off the partial block. - copy(d.mem[d.n:], b) + c := copy(memleft, b) d.v1 = round(d.v1, u64(d.mem[0:8])) d.v2 = round(d.v2, u64(d.mem[8:16])) d.v3 = round(d.v3, u64(d.mem[16:24])) d.v4 = round(d.v4, u64(d.mem[24:32])) - b = b[32-d.n:] + b = b[c:] d.n = 0 } @@ -133,21 +127,20 @@ func (d *Digest) Sum64() uint64 { h += d.total - i, end := 0, d.n - for ; i+8 <= end; i += 8 { - k1 := round(0, u64(d.mem[i:i+8])) + b := d.mem[:d.n&(len(d.mem)-1)] + for ; len(b) >= 8; b = b[8:] { + k1 := round(0, u64(b[:8])) h ^= k1 h = rol27(h)*prime1 + prime4 } - if i+4 <= end { - h ^= uint64(u32(d.mem[i:i+4])) * prime1 + if len(b) >= 4 { + h ^= uint64(u32(b[:4])) * prime1 h = rol23(h)*prime2 + prime3 - i += 4 + b = b[4:] } - for i < end { - h ^= uint64(d.mem[i]) * prime5 + for ; len(b) > 0; b = b[1:] { + h ^= uint64(b[0]) * prime5 h = rol11(h) * prime1 - i++ } h ^= h >> 33 diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s b/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s index be8db5bf79..3e8b132579 100644 --- a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s +++ b/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s @@ -1,215 +1,209 @@ +//go:build !appengine && gc && !purego // +build !appengine // +build gc // +build !purego #include "textflag.h" -// Register allocation: -// AX h -// SI pointer to advance through b -// DX n -// BX loop end -// R8 v1, k1 -// R9 v2 -// R10 v3 -// R11 v4 -// R12 tmp -// R13 prime1v -// R14 prime2v -// DI prime4v +// Registers: +#define h AX +#define d AX +#define p SI // pointer to advance through b +#define n DX +#define end BX // loop end +#define v1 R8 +#define v2 R9 +#define v3 R10 +#define v4 R11 +#define x R12 +#define prime1 R13 +#define prime2 R14 +#define prime4 DI -// round reads from and advances the buffer pointer in SI. -// It assumes that R13 has prime1v and R14 has prime2v. -#define round(r) \ - MOVQ (SI), R12 \ - ADDQ $8, SI \ - IMULQ R14, R12 \ - ADDQ R12, r \ - ROLQ $31, r \ - IMULQ R13, r +#define round(acc, x) \ + IMULQ prime2, x \ + ADDQ x, acc \ + ROLQ $31, acc \ + IMULQ prime1, acc -// mergeRound applies a merge round on the two registers acc and val. -// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v. -#define mergeRound(acc, val) \ - IMULQ R14, val \ - ROLQ $31, val \ - IMULQ R13, val \ - XORQ val, acc \ - IMULQ R13, acc \ - ADDQ DI, acc +// round0 performs the operation x = round(0, x). +#define round0(x) \ + IMULQ prime2, x \ + ROLQ $31, x \ + IMULQ prime1, x + +// mergeRound applies a merge round on the two registers acc and x. +// It assumes that prime1, prime2, and prime4 have been loaded. +#define mergeRound(acc, x) \ + round0(x) \ + XORQ x, acc \ + IMULQ prime1, acc \ + ADDQ prime4, acc + +// blockLoop processes as many 32-byte blocks as possible, +// updating v1, v2, v3, and v4. It assumes that there is at least one block +// to process. +#define blockLoop() \ +loop: \ + MOVQ +0(p), x \ + round(v1, x) \ + MOVQ +8(p), x \ + round(v2, x) \ + MOVQ +16(p), x \ + round(v3, x) \ + MOVQ +24(p), x \ + round(v4, x) \ + ADDQ $32, p \ + CMPQ p, end \ + JLE loop // func Sum64(b []byte) uint64 -TEXT ·Sum64(SB), NOSPLIT, $0-32 +TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 // Load fixed primes. - MOVQ ·prime1v(SB), R13 - MOVQ ·prime2v(SB), R14 - MOVQ ·prime4v(SB), DI + MOVQ ·primes+0(SB), prime1 + MOVQ ·primes+8(SB), prime2 + MOVQ ·primes+24(SB), prime4 // Load slice. - MOVQ b_base+0(FP), SI - MOVQ b_len+8(FP), DX - LEAQ (SI)(DX*1), BX + MOVQ b_base+0(FP), p + MOVQ b_len+8(FP), n + LEAQ (p)(n*1), end // The first loop limit will be len(b)-32. - SUBQ $32, BX + SUBQ $32, end // Check whether we have at least one block. - CMPQ DX, $32 + CMPQ n, $32 JLT noBlocks // Set up initial state (v1, v2, v3, v4). - MOVQ R13, R8 - ADDQ R14, R8 - MOVQ R14, R9 - XORQ R10, R10 - XORQ R11, R11 - SUBQ R13, R11 + MOVQ prime1, v1 + ADDQ prime2, v1 + MOVQ prime2, v2 + XORQ v3, v3 + XORQ v4, v4 + SUBQ prime1, v4 - // Loop until SI > BX. -blockLoop: - round(R8) - round(R9) - round(R10) - round(R11) + blockLoop() - CMPQ SI, BX - JLE blockLoop + MOVQ v1, h + ROLQ $1, h + MOVQ v2, x + ROLQ $7, x + ADDQ x, h + MOVQ v3, x + ROLQ $12, x + ADDQ x, h + MOVQ v4, x + ROLQ $18, x + ADDQ x, h - MOVQ R8, AX - ROLQ $1, AX - MOVQ R9, R12 - ROLQ $7, R12 - ADDQ R12, AX - MOVQ R10, R12 - ROLQ $12, R12 - ADDQ R12, AX - MOVQ R11, R12 - ROLQ $18, R12 - ADDQ R12, AX - - mergeRound(AX, R8) - mergeRound(AX, R9) - mergeRound(AX, R10) - mergeRound(AX, R11) + mergeRound(h, v1) + mergeRound(h, v2) + mergeRound(h, v3) + mergeRound(h, v4) JMP afterBlocks noBlocks: - MOVQ ·prime5v(SB), AX + MOVQ ·primes+32(SB), h afterBlocks: - ADDQ DX, AX + ADDQ n, h - // Right now BX has len(b)-32, and we want to loop until SI > len(b)-8. - ADDQ $24, BX + ADDQ $24, end + CMPQ p, end + JG try4 - CMPQ SI, BX - JG fourByte +loop8: + MOVQ (p), x + ADDQ $8, p + round0(x) + XORQ x, h + ROLQ $27, h + IMULQ prime1, h + ADDQ prime4, h -wordLoop: - // Calculate k1. - MOVQ (SI), R8 - ADDQ $8, SI - IMULQ R14, R8 - ROLQ $31, R8 - IMULQ R13, R8 + CMPQ p, end + JLE loop8 - XORQ R8, AX - ROLQ $27, AX - IMULQ R13, AX - ADDQ DI, AX +try4: + ADDQ $4, end + CMPQ p, end + JG try1 - CMPQ SI, BX - JLE wordLoop + MOVL (p), x + ADDQ $4, p + IMULQ prime1, x + XORQ x, h -fourByte: - ADDQ $4, BX - CMPQ SI, BX - JG singles + ROLQ $23, h + IMULQ prime2, h + ADDQ ·primes+16(SB), h - MOVL (SI), R8 - ADDQ $4, SI - IMULQ R13, R8 - XORQ R8, AX - - ROLQ $23, AX - IMULQ R14, AX - ADDQ ·prime3v(SB), AX - -singles: - ADDQ $4, BX - CMPQ SI, BX +try1: + ADDQ $4, end + CMPQ p, end JGE finalize -singlesLoop: - MOVBQZX (SI), R12 - ADDQ $1, SI - IMULQ ·prime5v(SB), R12 - XORQ R12, AX +loop1: + MOVBQZX (p), x + ADDQ $1, p + IMULQ ·primes+32(SB), x + XORQ x, h + ROLQ $11, h + IMULQ prime1, h - ROLQ $11, AX - IMULQ R13, AX - - CMPQ SI, BX - JL singlesLoop + CMPQ p, end + JL loop1 finalize: - MOVQ AX, R12 - SHRQ $33, R12 - XORQ R12, AX - IMULQ R14, AX - MOVQ AX, R12 - SHRQ $29, R12 - XORQ R12, AX - IMULQ ·prime3v(SB), AX - MOVQ AX, R12 - SHRQ $32, R12 - XORQ R12, AX + MOVQ h, x + SHRQ $33, x + XORQ x, h + IMULQ prime2, h + MOVQ h, x + SHRQ $29, x + XORQ x, h + IMULQ ·primes+16(SB), h + MOVQ h, x + SHRQ $32, x + XORQ x, h - MOVQ AX, ret+24(FP) + MOVQ h, ret+24(FP) RET -// writeBlocks uses the same registers as above except that it uses AX to store -// the d pointer. - // func writeBlocks(d *Digest, b []byte) int -TEXT ·writeBlocks(SB), NOSPLIT, $0-40 +TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 // Load fixed primes needed for round. - MOVQ ·prime1v(SB), R13 - MOVQ ·prime2v(SB), R14 + MOVQ ·primes+0(SB), prime1 + MOVQ ·primes+8(SB), prime2 // Load slice. - MOVQ b_base+8(FP), SI - MOVQ b_len+16(FP), DX - LEAQ (SI)(DX*1), BX - SUBQ $32, BX + MOVQ b_base+8(FP), p + MOVQ b_len+16(FP), n + LEAQ (p)(n*1), end + SUBQ $32, end // Load vN from d. - MOVQ d+0(FP), AX - MOVQ 0(AX), R8 // v1 - MOVQ 8(AX), R9 // v2 - MOVQ 16(AX), R10 // v3 - MOVQ 24(AX), R11 // v4 + MOVQ s+0(FP), d + MOVQ 0(d), v1 + MOVQ 8(d), v2 + MOVQ 16(d), v3 + MOVQ 24(d), v4 // We don't need to check the loop condition here; this function is // always called with at least one block of data to process. -blockLoop: - round(R8) - round(R9) - round(R10) - round(R11) - - CMPQ SI, BX - JLE blockLoop + blockLoop() // Copy vN back to d. - MOVQ R8, 0(AX) - MOVQ R9, 8(AX) - MOVQ R10, 16(AX) - MOVQ R11, 24(AX) + MOVQ v1, 0(d) + MOVQ v2, 8(d) + MOVQ v3, 16(d) + MOVQ v4, 24(d) - // The number of bytes written is SI minus the old base pointer. - SUBQ b_base+8(FP), SI - MOVQ SI, ret+32(FP) + // The number of bytes written is p minus the old base pointer. + SUBQ b_base+8(FP), p + MOVQ p, ret+32(FP) RET diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s b/vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s new file mode 100644 index 0000000000..7e3145a221 --- /dev/null +++ b/vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s @@ -0,0 +1,183 @@ +//go:build !appengine && gc && !purego +// +build !appengine +// +build gc +// +build !purego + +#include "textflag.h" + +// Registers: +#define digest R1 +#define h R2 // return value +#define p R3 // input pointer +#define n R4 // input length +#define nblocks R5 // n / 32 +#define prime1 R7 +#define prime2 R8 +#define prime3 R9 +#define prime4 R10 +#define prime5 R11 +#define v1 R12 +#define v2 R13 +#define v3 R14 +#define v4 R15 +#define x1 R20 +#define x2 R21 +#define x3 R22 +#define x4 R23 + +#define round(acc, x) \ + MADD prime2, acc, x, acc \ + ROR $64-31, acc \ + MUL prime1, acc + +// round0 performs the operation x = round(0, x). +#define round0(x) \ + MUL prime2, x \ + ROR $64-31, x \ + MUL prime1, x + +#define mergeRound(acc, x) \ + round0(x) \ + EOR x, acc \ + MADD acc, prime4, prime1, acc + +// blockLoop processes as many 32-byte blocks as possible, +// updating v1, v2, v3, and v4. It assumes that n >= 32. +#define blockLoop() \ + LSR $5, n, nblocks \ + PCALIGN $16 \ + loop: \ + LDP.P 16(p), (x1, x2) \ + LDP.P 16(p), (x3, x4) \ + round(v1, x1) \ + round(v2, x2) \ + round(v3, x3) \ + round(v4, x4) \ + SUB $1, nblocks \ + CBNZ nblocks, loop + +// func Sum64(b []byte) uint64 +TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32 + LDP b_base+0(FP), (p, n) + + LDP ·primes+0(SB), (prime1, prime2) + LDP ·primes+16(SB), (prime3, prime4) + MOVD ·primes+32(SB), prime5 + + CMP $32, n + CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 } + BLT afterLoop + + ADD prime1, prime2, v1 + MOVD prime2, v2 + MOVD $0, v3 + NEG prime1, v4 + + blockLoop() + + ROR $64-1, v1, x1 + ROR $64-7, v2, x2 + ADD x1, x2 + ROR $64-12, v3, x3 + ROR $64-18, v4, x4 + ADD x3, x4 + ADD x2, x4, h + + mergeRound(h, v1) + mergeRound(h, v2) + mergeRound(h, v3) + mergeRound(h, v4) + +afterLoop: + ADD n, h + + TBZ $4, n, try8 + LDP.P 16(p), (x1, x2) + + round0(x1) + + // NOTE: here and below, sequencing the EOR after the ROR (using a + // rotated register) is worth a small but measurable speedup for small + // inputs. + ROR $64-27, h + EOR x1 @> 64-27, h, h + MADD h, prime4, prime1, h + + round0(x2) + ROR $64-27, h + EOR x2 @> 64-27, h, h + MADD h, prime4, prime1, h + +try8: + TBZ $3, n, try4 + MOVD.P 8(p), x1 + + round0(x1) + ROR $64-27, h + EOR x1 @> 64-27, h, h + MADD h, prime4, prime1, h + +try4: + TBZ $2, n, try2 + MOVWU.P 4(p), x2 + + MUL prime1, x2 + ROR $64-23, h + EOR x2 @> 64-23, h, h + MADD h, prime3, prime2, h + +try2: + TBZ $1, n, try1 + MOVHU.P 2(p), x3 + AND $255, x3, x1 + LSR $8, x3, x2 + + MUL prime5, x1 + ROR $64-11, h + EOR x1 @> 64-11, h, h + MUL prime1, h + + MUL prime5, x2 + ROR $64-11, h + EOR x2 @> 64-11, h, h + MUL prime1, h + +try1: + TBZ $0, n, finalize + MOVBU (p), x4 + + MUL prime5, x4 + ROR $64-11, h + EOR x4 @> 64-11, h, h + MUL prime1, h + +finalize: + EOR h >> 33, h + MUL prime2, h + EOR h >> 29, h + MUL prime3, h + EOR h >> 32, h + + MOVD h, ret+24(FP) + RET + +// func writeBlocks(d *Digest, b []byte) int +TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40 + LDP ·primes+0(SB), (prime1, prime2) + + // Load state. Assume v[1-4] are stored contiguously. + MOVD d+0(FP), digest + LDP 0(digest), (v1, v2) + LDP 16(digest), (v3, v4) + + LDP b_base+8(FP), (p, n) + + blockLoop() + + // Store updated state. + STP (v1, v2), 0(digest) + STP (v3, v4), 16(digest) + + BIC $31, n + MOVD n, ret+32(FP) + RET diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.go b/vendor/github.com/cespare/xxhash/v2/xxhash_asm.go similarity index 73% rename from vendor/github.com/cespare/xxhash/v2/xxhash_amd64.go rename to vendor/github.com/cespare/xxhash/v2/xxhash_asm.go index ad14b807f4..9216e0a40c 100644 --- a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.go +++ b/vendor/github.com/cespare/xxhash/v2/xxhash_asm.go @@ -1,3 +1,5 @@ +//go:build (amd64 || arm64) && !appengine && gc && !purego +// +build amd64 arm64 // +build !appengine // +build gc // +build !purego diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_other.go b/vendor/github.com/cespare/xxhash/v2/xxhash_other.go index 4a5a821603..26df13bba4 100644 --- a/vendor/github.com/cespare/xxhash/v2/xxhash_other.go +++ b/vendor/github.com/cespare/xxhash/v2/xxhash_other.go @@ -1,4 +1,5 @@ -// +build !amd64 appengine !gc purego +//go:build (!amd64 && !arm64) || appengine || !gc || purego +// +build !amd64,!arm64 appengine !gc purego package xxhash @@ -14,10 +15,10 @@ func Sum64(b []byte) uint64 { var h uint64 if n >= 32 { - v1 := prime1v + prime2 + v1 := primes[0] + prime2 v2 := prime2 v3 := uint64(0) - v4 := -prime1v + v4 := -primes[0] for len(b) >= 32 { v1 = round(v1, u64(b[0:8:len(b)])) v2 = round(v2, u64(b[8:16:len(b)])) @@ -36,19 +37,18 @@ func Sum64(b []byte) uint64 { h += uint64(n) - i, end := 0, len(b) - for ; i+8 <= end; i += 8 { - k1 := round(0, u64(b[i:i+8:len(b)])) + for ; len(b) >= 8; b = b[8:] { + k1 := round(0, u64(b[:8])) h ^= k1 h = rol27(h)*prime1 + prime4 } - if i+4 <= end { - h ^= uint64(u32(b[i:i+4:len(b)])) * prime1 + if len(b) >= 4 { + h ^= uint64(u32(b[:4])) * prime1 h = rol23(h)*prime2 + prime3 - i += 4 + b = b[4:] } - for ; i < end; i++ { - h ^= uint64(b[i]) * prime5 + for ; len(b) > 0; b = b[1:] { + h ^= uint64(b[0]) * prime5 h = rol11(h) * prime1 } diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go b/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go index fc9bea7a31..e86f1b5fd8 100644 --- a/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go +++ b/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go @@ -1,3 +1,4 @@ +//go:build appengine // +build appengine // This file contains the safe implementations of otherwise unsafe-using code. diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go b/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go index 376e0ca2e4..1c1638fd88 100644 --- a/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go +++ b/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go @@ -1,3 +1,4 @@ +//go:build !appengine // +build !appengine // This file encapsulates usage of unsafe. @@ -11,7 +12,7 @@ import ( // In the future it's possible that compiler optimizations will make these // XxxString functions unnecessary by realizing that calls such as -// Sum64([]byte(s)) don't need to copy s. See https://golang.org/issue/2205. +// Sum64([]byte(s)) don't need to copy s. See https://go.dev/issue/2205. // If that happens, even if we keep these functions they can be replaced with // the trivial safe code. diff --git a/vendor/modules.txt b/vendor/modules.txt index b63a6a97ff..d122f22bdb 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -12,7 +12,7 @@ github.com/Microsoft/go-winio/pkg/guid # github.com/beorn7/perks v1.0.1 ## explicit; go 1.11 github.com/beorn7/perks/quantile -# github.com/cespare/xxhash/v2 v2.1.2 +# github.com/cespare/xxhash/v2 v2.2.0 ## explicit; go 1.11 github.com/cespare/xxhash/v2 # github.com/container-orchestrated-devices/container-device-interface v0.6.0