vendor: github.com/cespare/xxhash/v2 v2.2.0

full diff: https://github.com/cespare/xxhash/compare/v2.1.2...v2.2.0 Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2023-07-20 00:21:37 +02:00 · 2023-07-20 00:21:37 +02:00 · 5b138189b9
parent c1d0657029
commit 5b138189b9
12 changed files with 401 additions and 213 deletions
--- a/vendor.mod
+++ b/vendor.mod
@ -50,7 +50,7 @@ require (
 	github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
 	github.com/Microsoft/go-winio v0.5.2 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
-	github.com/cespare/xxhash/v2 v2.1.2 // indirect
+	github.com/cespare/xxhash/v2 v2.2.0 // indirect
 	github.com/docker/go v1.5.1-1.0.20160303222718-d30aec9fd63c // indirect
 	github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
 	github.com/docker/go-metrics v0.0.1 // indirect
--- a/vendor.sum
+++ b/vendor.sum
@ -67,8 +67,9 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA
 github.com/certifi/gocertifi v0.0.0-20191021191039-0944d244cd40/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA=
 github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6GwGcLpkNXPUTkMRoywsNa/ol15pxFe6ERfguA=
 github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE=
 github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
 github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
 github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
--- a/vendor/github.com/cespare/xxhash/v2/README.md
+++ b/vendor/github.com/cespare/xxhash/v2/README.md
@ -3,8 +3,7 @@
 [![Go Reference](https://pkg.go.dev/badge/github.com/cespare/xxhash/v2.svg)](https://pkg.go.dev/github.com/cespare/xxhash/v2)
 [![Test](https://github.com/cespare/xxhash/actions/workflows/test.yml/badge.svg)](https://github.com/cespare/xxhash/actions/workflows/test.yml)
-xxhash is a Go implementation of the 64-bit
+xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a
 [xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
 high-quality hashing algorithm that is much faster than anything in the Go
 standard library.
@ -25,8 +24,11 @@ func (*Digest) WriteString(string) (int, error)
 func (*Digest) Sum64() uint64
 ```
-This implementation provides a fast pure-Go implementation and an even faster
+The package is written with optimized pure Go and also contains even faster
-assembly implementation for amd64.
+assembly implementations for amd64 and arm64. If desired, the `purego` build tag
 opts into using the Go code even on those architectures.
 [xxHash]: http://cyan4973.github.io/xxHash/
 ## Compatibility
@ -45,19 +47,20 @@ I recommend using the latest release of Go.
 Here are some quick benchmarks comparing the pure-Go and assembly
 implementations of Sum64.
-| input size | purego | asm |
+| input size | purego    | asm       |
-| --- | --- | --- |
+| ---------- | --------- | --------- |
-| 5 B   |  979.66 MB/s |  1291.17 MB/s  |
+| 4 B        |  1.3 GB/s |  1.2 GB/s |
-| 100 B | 7475.26 MB/s | 7973.40 MB/s  |
+| 16 B       |  2.9 GB/s |  3.5 GB/s |
-| 4 KB  | 17573.46 MB/s | 17602.65 MB/s |
+| 100 B      |  6.9 GB/s |  8.1 GB/s |
-| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
+| 4 KB       | 11.7 GB/s | 16.7 GB/s |
 | 10 MB      | 12.0 GB/s | 17.3 GB/s |
-These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
+These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C
-the following commands under Go 1.11.2:
+CPU using the following commands under Go 1.19.2:
 ```
-$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
+benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$')
-$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
+benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$')
 ```
 ## Projects using this package
--- a/vendor/github.com/cespare/xxhash/v2/testall.sh
+++ b/vendor/github.com/cespare/xxhash/v2/testall.sh
@ -0,0 +1,10 @@
 #!/bin/bash
 set -eu -o pipefail
 # Small convenience script for running the tests with various combinations of
 # arch/tags. This assumes we're running on amd64 and have qemu available.
 go test ./...
 go test -tags purego ./...
 GOARCH=arm64 go test
 GOARCH=arm64 go test -tags purego
--- a/vendor/github.com/cespare/xxhash/v2/xxhash.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash.go
@ -16,19 +16,11 @@ const (
 	prime5 uint64 = 2870177450012600261
 )
-// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
+// Store the primes in an array as well.
-// possible in the Go code is worth a small (but measurable) performance boost
+//
-// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
+// The consts are used when possible in Go code to avoid MOVs but we need a
-// convenience in the Go code in a few places where we need to intentionally
+// contiguous array of the assembly code.
-// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
+var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
 // result overflows a uint64).
 var (
 	prime1v = prime1
 	prime2v = prime2
 	prime3v = prime3
 	prime4v = prime4
 	prime5v = prime5
 )
 // Digest implements hash.Hash64.
 type Digest struct {
@ -50,10 +42,10 @@ func New() *Digest {
 // Reset clears the Digest's state so that it can be reused.
 func (d *Digest) Reset() {
-	d.v1 = prime1v + prime2
+	d.v1 = primes[0] + prime2
 	d.v2 = prime2
 	d.v3 = 0
-	d.v4 = -prime1v
+	d.v4 = -primes[0]
 	d.total = 0
 	d.n = 0
 }
@ -69,21 +61,23 @@ func (d *Digest) Write(b []byte) (n int, err error) {
 	n = len(b)
 	d.total += uint64(n)
 	memleft := d.mem[d.n&(len(d.mem)-1):]
 	if d.n+n < 32 {
 		// This new data doesn't even fill the current block.
-		copy(d.mem[d.n:], b)
+		copy(memleft, b)
 		d.n += n
 		return
 	}
 	if d.n > 0 {
 		// Finish off the partial block.
-		copy(d.mem[d.n:], b)
+		c := copy(memleft, b)
 		d.v1 = round(d.v1, u64(d.mem[0:8]))
 		d.v2 = round(d.v2, u64(d.mem[8:16]))
 		d.v3 = round(d.v3, u64(d.mem[16:24]))
 		d.v4 = round(d.v4, u64(d.mem[24:32]))
-		b = b[32-d.n:]
+		b = b[c:]
 		d.n = 0
 	}
@ -133,21 +127,20 @@ func (d *Digest) Sum64() uint64 {
 	h += d.total
-	i, end := 0, d.n
+	b := d.mem[:d.n&(len(d.mem)-1)]
-	for ; i+8 <= end; i += 8 {
+	for ; len(b) >= 8; b = b[8:] {
-		k1 := round(0, u64(d.mem[i:i+8]))
+		k1 := round(0, u64(b[:8]))
 		h ^= k1
 		h = rol27(h)*prime1 + prime4
 	}
-	if i+4 <= end {
+	if len(b) >= 4 {
-		h ^= uint64(u32(d.mem[i:i+4])) * prime1
+		h ^= uint64(u32(b[:4])) * prime1
 		h = rol23(h)*prime2 + prime3
-		i += 4
+		b = b[4:]
 	}
-	for i < end {
+	for ; len(b) > 0; b = b[1:] {
-		h ^= uint64(d.mem[i]) * prime5
+		h ^= uint64(b[0]) * prime5
 		h = rol11(h) * prime1
 		i++
 	}
 	h ^= h >> 33
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s
@ -1,215 +1,209 @@
 //go:build !appengine && gc && !purego
 // +build !appengine
 // +build gc
 // +build !purego
 #include "textflag.h"
-// Register allocation:
+// Registers:
-// AX	h
+#define h      AX
-// SI	pointer to advance through b
+#define d      AX
-// DX	n
+#define p      SI // pointer to advance through b
-// BX	loop end
+#define n      DX
-// R8	v1, k1
+#define end    BX // loop end
-// R9	v2
+#define v1     R8
-// R10	v3
+#define v2     R9
-// R11	v4
+#define v3     R10
-// R12	tmp
+#define v4     R11
-// R13	prime1v
+#define x      R12
-// R14	prime2v
+#define prime1 R13
-// DI	prime4v
+#define prime2 R14
 #define prime4 DI
-// round reads from and advances the buffer pointer in SI.
+#define round(acc, x) \
-// It assumes that R13 has prime1v and R14 has prime2v.
+	IMULQ prime2, x   \
-#define round(r) \
+	ADDQ  x, acc      \
-	MOVQ  (SI), R12 \
+	ROLQ  $31, acc    \
-	ADDQ  $8, SI    \
+	IMULQ prime1, acc
 	IMULQ R14, R12  \
 	ADDQ  R12, r    \
 	ROLQ  $31, r    \
 	IMULQ R13, r
-// mergeRound applies a merge round on the two registers acc and val.
+// round0 performs the operation x = round(0, x).
-// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
+#define round0(x) \
-#define mergeRound(acc, val) \
+	IMULQ prime2, x \
-	IMULQ R14, val \
+	ROLQ  $31, x    \
-	ROLQ  $31, val \
+	IMULQ prime1, x
-	IMULQ R13, val \
+
-	XORQ  val, acc \
+// mergeRound applies a merge round on the two registers acc and x.
-	IMULQ R13, acc \
+// It assumes that prime1, prime2, and prime4 have been loaded.
-	ADDQ  DI, acc
+#define mergeRound(acc, x) \
 	round0(x)         \
 	XORQ  x, acc      \
 	IMULQ prime1, acc \
 	ADDQ  prime4, acc
 // blockLoop processes as many 32-byte blocks as possible,
 // updating v1, v2, v3, and v4. It assumes that there is at least one block
 // to process.
 #define blockLoop() \
 loop:  \
 	MOVQ +0(p), x  \
 	round(v1, x)   \
 	MOVQ +8(p), x  \
 	round(v2, x)   \
 	MOVQ +16(p), x \
 	round(v3, x)   \
 	MOVQ +24(p), x \
 	round(v4, x)   \
 	ADDQ $32, p    \
 	CMPQ p, end    \
 	JLE  loop
 // func Sum64(b []byte) uint64
-TEXT ·Sum64(SB), NOSPLIT, $0-32
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
 	// Load fixed primes.
-	MOVQ ·prime1v(SB), R13
+	MOVQ ·primes+0(SB), prime1
-	MOVQ ·prime2v(SB), R14
+	MOVQ ·primes+8(SB), prime2
-	MOVQ ·prime4v(SB), DI
+	MOVQ ·primes+24(SB), prime4
 	// Load slice.
-	MOVQ b_base+0(FP), SI
+	MOVQ b_base+0(FP), p
-	MOVQ b_len+8(FP), DX
+	MOVQ b_len+8(FP), n
-	LEAQ (SI)(DX*1), BX
+	LEAQ (p)(n*1), end
 	// The first loop limit will be len(b)-32.
-	SUBQ $32, BX
+	SUBQ $32, end
 	// Check whether we have at least one block.
-	CMPQ DX, $32
+	CMPQ n, $32
 	JLT  noBlocks
 	// Set up initial state (v1, v2, v3, v4).
-	MOVQ R13, R8
+	MOVQ prime1, v1
-	ADDQ R14, R8
+	ADDQ prime2, v1
-	MOVQ R14, R9
+	MOVQ prime2, v2
-	XORQ R10, R10
+	XORQ v3, v3
-	XORQ R11, R11
+	XORQ v4, v4
-	SUBQ R13, R11
+	SUBQ prime1, v4
-	// Loop until SI > BX.
+	blockLoop()
 blockLoop:
 	round(R8)
 	round(R9)
 	round(R10)
 	round(R11)
-	CMPQ SI, BX
+	MOVQ v1, h
-	JLE  blockLoop
+	ROLQ $1, h
 	MOVQ v2, x
 	ROLQ $7, x
 	ADDQ x, h
 	MOVQ v3, x
 	ROLQ $12, x
 	ADDQ x, h
 	MOVQ v4, x
 	ROLQ $18, x
 	ADDQ x, h
-	MOVQ R8, AX
+	mergeRound(h, v1)
-	ROLQ $1, AX
+	mergeRound(h, v2)
-	MOVQ R9, R12
+	mergeRound(h, v3)
-	ROLQ $7, R12
+	mergeRound(h, v4)
 	ADDQ R12, AX
 	MOVQ R10, R12
 	ROLQ $12, R12
 	ADDQ R12, AX
 	MOVQ R11, R12
 	ROLQ $18, R12
 	ADDQ R12, AX
 	mergeRound(AX, R8)
 	mergeRound(AX, R9)
 	mergeRound(AX, R10)
 	mergeRound(AX, R11)
 	JMP afterBlocks
 noBlocks:
-	MOVQ ·prime5v(SB), AX
+	MOVQ ·primes+32(SB), h
 afterBlocks:
-	ADDQ DX, AX
+	ADDQ n, h
-	// Right now BX has len(b)-32, and we want to loop until SI > len(b)-8.
+	ADDQ $24, end
-	ADDQ $24, BX
+	CMPQ p, end
 	JG   try4
-	CMPQ SI, BX
+loop8:
-	JG   fourByte
+	MOVQ  (p), x
 	ADDQ  $8, p
 	round0(x)
 	XORQ  x, h
 	ROLQ  $27, h
 	IMULQ prime1, h
 	ADDQ  prime4, h
-wordLoop:
+	CMPQ p, end
-	// Calculate k1.
+	JLE  loop8
 	MOVQ  (SI), R8
 	ADDQ  $8, SI
 	IMULQ R14, R8
 	ROLQ  $31, R8
 	IMULQ R13, R8
-	XORQ  R8, AX
+try4:
-	ROLQ  $27, AX
+	ADDQ $4, end
-	IMULQ R13, AX
+	CMPQ p, end
-	ADDQ  DI, AX
+	JG   try1
-	CMPQ SI, BX
+	MOVL  (p), x
-	JLE  wordLoop
+	ADDQ  $4, p
 	IMULQ prime1, x
 	XORQ  x, h
-fourByte:
+	ROLQ  $23, h
-	ADDQ $4, BX
+	IMULQ prime2, h
-	CMPQ SI, BX
+	ADDQ  ·primes+16(SB), h
 	JG   singles
-	MOVL  (SI), R8
+try1:
-	ADDQ  $4, SI
+	ADDQ $4, end
-	IMULQ R13, R8
+	CMPQ p, end
 	XORQ  R8, AX
 	ROLQ  $23, AX
 	IMULQ R14, AX
 	ADDQ  ·prime3v(SB), AX
 singles:
 	ADDQ $4, BX
 	CMPQ SI, BX
 	JGE  finalize
-singlesLoop:
+loop1:
-	MOVBQZX (SI), R12
+	MOVBQZX (p), x
-	ADDQ    $1, SI
+	ADDQ    $1, p
-	IMULQ   ·prime5v(SB), R12
+	IMULQ   ·primes+32(SB), x
-	XORQ    R12, AX
+	XORQ    x, h
 	ROLQ    $11, h
 	IMULQ   prime1, h
-	ROLQ  $11, AX
+	CMPQ p, end
-	IMULQ R13, AX
+	JL   loop1
 	CMPQ SI, BX
 	JL   singlesLoop
 finalize:
-	MOVQ  AX, R12
+	MOVQ  h, x
-	SHRQ  $33, R12
+	SHRQ  $33, x
-	XORQ  R12, AX
+	XORQ  x, h
-	IMULQ R14, AX
+	IMULQ prime2, h
-	MOVQ  AX, R12
+	MOVQ  h, x
-	SHRQ  $29, R12
+	SHRQ  $29, x
-	XORQ  R12, AX
+	XORQ  x, h
-	IMULQ ·prime3v(SB), AX
+	IMULQ ·primes+16(SB), h
-	MOVQ  AX, R12
+	MOVQ  h, x
-	SHRQ  $32, R12
+	SHRQ  $32, x
-	XORQ  R12, AX
+	XORQ  x, h
-	MOVQ AX, ret+24(FP)
+	MOVQ h, ret+24(FP)
 	RET
 // writeBlocks uses the same registers as above except that it uses AX to store
 // the d pointer.
 // func writeBlocks(d *Digest, b []byte) int
-TEXT ·writeBlocks(SB), NOSPLIT, $0-40
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
 	// Load fixed primes needed for round.
-	MOVQ ·prime1v(SB), R13
+	MOVQ ·primes+0(SB), prime1
-	MOVQ ·prime2v(SB), R14
+	MOVQ ·primes+8(SB), prime2
 	// Load slice.
-	MOVQ b_base+8(FP), SI
+	MOVQ b_base+8(FP), p
-	MOVQ b_len+16(FP), DX
+	MOVQ b_len+16(FP), n
-	LEAQ (SI)(DX*1), BX
+	LEAQ (p)(n*1), end
-	SUBQ $32, BX
+	SUBQ $32, end
 	// Load vN from d.
-	MOVQ d+0(FP), AX
+	MOVQ s+0(FP), d
-	MOVQ 0(AX), R8   // v1
+	MOVQ 0(d), v1
-	MOVQ 8(AX), R9   // v2
+	MOVQ 8(d), v2
-	MOVQ 16(AX), R10 // v3
+	MOVQ 16(d), v3
-	MOVQ 24(AX), R11 // v4
+	MOVQ 24(d), v4
 	// We don't need to check the loop condition here; this function is
 	// always called with at least one block of data to process.
-blockLoop:
+	blockLoop()
 	round(R8)
 	round(R9)
 	round(R10)
 	round(R11)
 	CMPQ SI, BX
 	JLE  blockLoop
 	// Copy vN back to d.
-	MOVQ R8, 0(AX)
+	MOVQ v1, 0(d)
-	MOVQ R9, 8(AX)
+	MOVQ v2, 8(d)
-	MOVQ R10, 16(AX)
+	MOVQ v3, 16(d)
-	MOVQ R11, 24(AX)
+	MOVQ v4, 24(d)
-	// The number of bytes written is SI minus the old base pointer.
+	// The number of bytes written is p minus the old base pointer.
-	SUBQ b_base+8(FP), SI
+	SUBQ b_base+8(FP), p
-	MOVQ SI, ret+32(FP)
+	MOVQ p, ret+32(FP)
 	RET
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s
@ -0,0 +1,183 @@
 //go:build !appengine && gc && !purego
 // +build !appengine
 // +build gc
 // +build !purego
 #include "textflag.h"
 // Registers:
 #define digest	R1
 #define h	R2 // return value
 #define p	R3 // input pointer
 #define n	R4 // input length
 #define nblocks	R5 // n / 32
 #define prime1	R7
 #define prime2	R8
 #define prime3	R9
 #define prime4	R10
 #define prime5	R11
 #define v1	R12
 #define v2	R13
 #define v3	R14
 #define v4	R15
 #define x1	R20
 #define x2	R21
 #define x3	R22
 #define x4	R23
 #define round(acc, x) \
 	MADD prime2, acc, x, acc \
 	ROR  $64-31, acc         \
 	MUL  prime1, acc
 // round0 performs the operation x = round(0, x).
 #define round0(x) \
 	MUL prime2, x \
 	ROR $64-31, x \
 	MUL prime1, x
 #define mergeRound(acc, x) \
 	round0(x)                     \
 	EOR  x, acc                   \
 	MADD acc, prime4, prime1, acc
 // blockLoop processes as many 32-byte blocks as possible,
 // updating v1, v2, v3, and v4. It assumes that n >= 32.
 #define blockLoop() \
 	LSR     $5, n, nblocks  \
 	PCALIGN $16             \
 	loop:                   \
 	LDP.P   16(p), (x1, x2) \
 	LDP.P   16(p), (x3, x4) \
 	round(v1, x1)           \
 	round(v2, x2)           \
 	round(v3, x3)           \
 	round(v4, x4)           \
 	SUB     $1, nblocks     \
 	CBNZ    nblocks, loop
 // func Sum64(b []byte) uint64
 TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
 	LDP b_base+0(FP), (p, n)
 	LDP  ·primes+0(SB), (prime1, prime2)
 	LDP  ·primes+16(SB), (prime3, prime4)
 	MOVD ·primes+32(SB), prime5
 	CMP  $32, n
 	CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 }
 	BLT  afterLoop
 	ADD  prime1, prime2, v1
 	MOVD prime2, v2
 	MOVD $0, v3
 	NEG  prime1, v4
 	blockLoop()
 	ROR $64-1, v1, x1
 	ROR $64-7, v2, x2
 	ADD x1, x2
 	ROR $64-12, v3, x3
 	ROR $64-18, v4, x4
 	ADD x3, x4
 	ADD x2, x4, h
 	mergeRound(h, v1)
 	mergeRound(h, v2)
 	mergeRound(h, v3)
 	mergeRound(h, v4)
 afterLoop:
 	ADD n, h
 	TBZ   $4, n, try8
 	LDP.P 16(p), (x1, x2)
 	round0(x1)
 	// NOTE: here and below, sequencing the EOR after the ROR (using a
 	// rotated register) is worth a small but measurable speedup for small
 	// inputs.
 	ROR  $64-27, h
 	EOR  x1 @> 64-27, h, h
 	MADD h, prime4, prime1, h
 	round0(x2)
 	ROR  $64-27, h
 	EOR  x2 @> 64-27, h, h
 	MADD h, prime4, prime1, h
 try8:
 	TBZ    $3, n, try4
 	MOVD.P 8(p), x1
 	round0(x1)
 	ROR  $64-27, h
 	EOR  x1 @> 64-27, h, h
 	MADD h, prime4, prime1, h
 try4:
 	TBZ     $2, n, try2
 	MOVWU.P 4(p), x2
 	MUL  prime1, x2
 	ROR  $64-23, h
 	EOR  x2 @> 64-23, h, h
 	MADD h, prime3, prime2, h
 try2:
 	TBZ     $1, n, try1
 	MOVHU.P 2(p), x3
 	AND     $255, x3, x1
 	LSR     $8, x3, x2
 	MUL prime5, x1
 	ROR $64-11, h
 	EOR x1 @> 64-11, h, h
 	MUL prime1, h
 	MUL prime5, x2
 	ROR $64-11, h
 	EOR x2 @> 64-11, h, h
 	MUL prime1, h
 try1:
 	TBZ   $0, n, finalize
 	MOVBU (p), x4
 	MUL prime5, x4
 	ROR $64-11, h
 	EOR x4 @> 64-11, h, h
 	MUL prime1, h
 finalize:
 	EOR h >> 33, h
 	MUL prime2, h
 	EOR h >> 29, h
 	MUL prime3, h
 	EOR h >> 32, h
 	MOVD h, ret+24(FP)
 	RET
 // func writeBlocks(d *Digest, b []byte) int
 TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
 	LDP ·primes+0(SB), (prime1, prime2)
 	// Load state. Assume v[1-4] are stored contiguously.
 	MOVD d+0(FP), digest
 	LDP  0(digest), (v1, v2)
 	LDP  16(digest), (v3, v4)
 	LDP b_base+8(FP), (p, n)
 	blockLoop()
 	// Store updated state.
 	STP (v1, v2), 0(digest)
 	STP (v3, v4), 16(digest)
 	BIC  $31, n
 	MOVD n, ret+32(FP)
 	RET
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.go
@ -1,3 +1,5 @@
 //go:build (amd64 || arm64) && !appengine && gc && !purego
 // +build amd64 arm64
 // +build !appengine
 // +build gc
 // +build !purego
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_other.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_other.go
@ -1,4 +1,5 @@
-// +build !amd64 appengine !gc purego
+//go:build (!amd64 && !arm64) || appengine || !gc || purego
 // +build !amd64,!arm64 appengine !gc purego
 package xxhash
@ -14,10 +15,10 @@ func Sum64(b []byte) uint64 {
 	var h uint64
 	if n >= 32 {
-		v1 := prime1v + prime2
+		v1 := primes[0] + prime2
 		v2 := prime2
 		v3 := uint64(0)
-		v4 := -prime1v
+		v4 := -primes[0]
 		for len(b) >= 32 {
 			v1 = round(v1, u64(b[0:8:len(b)]))
 			v2 = round(v2, u64(b[8:16:len(b)]))
@ -36,19 +37,18 @@ func Sum64(b []byte) uint64 {
 	h += uint64(n)
-	i, end := 0, len(b)
+	for ; len(b) >= 8; b = b[8:] {
-	for ; i+8 <= end; i += 8 {
+		k1 := round(0, u64(b[:8]))
 		k1 := round(0, u64(b[i:i+8:len(b)]))
 		h ^= k1
 		h = rol27(h)*prime1 + prime4
 	}
-	if i+4 <= end {
+	if len(b) >= 4 {
-		h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
+		h ^= uint64(u32(b[:4])) * prime1
 		h = rol23(h)*prime2 + prime3
-		i += 4
+		b = b[4:]
 	}
-	for ; i < end; i++ {
+	for ; len(b) > 0; b = b[1:] {
-		h ^= uint64(b[i]) * prime5
+		h ^= uint64(b[0]) * prime5
 		h = rol11(h) * prime1
 	}
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go
@ -1,3 +1,4 @@
 //go:build appengine
 // +build appengine
 // This file contains the safe implementations of otherwise unsafe-using code.
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
@ -1,3 +1,4 @@
 //go:build !appengine
 // +build !appengine
 // This file encapsulates usage of unsafe.
@ -11,7 +12,7 @@ import (
 // In the future it's possible that compiler optimizations will make these
 // XxxString functions unnecessary by realizing that calls such as
-// Sum64([]byte(s)) don't need to copy s. See https://golang.org/issue/2205.
+// Sum64([]byte(s)) don't need to copy s. See https://go.dev/issue/2205.
 // If that happens, even if we keep these functions they can be replaced with
 // the trivial safe code.
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -12,7 +12,7 @@ github.com/Microsoft/go-winio/pkg/guid
 # github.com/beorn7/perks v1.0.1
 ## explicit; go 1.11
 github.com/beorn7/perks/quantile
-# github.com/cespare/xxhash/v2 v2.1.2
+# github.com/cespare/xxhash/v2 v2.2.0
 ## explicit; go 1.11
 github.com/cespare/xxhash/v2
 # github.com/container-orchestrated-devices/container-device-interface v0.6.0