vendor: update golang.org/x/crypto 69ecbb4d6d5dab05e49161c6e77ea40a030884e1 (CVE-2020-7919)

Includes 69ecbb4d6d (forward-port of 8b5121be2f), which fixes CVE-2020-7919: - Panic in crypto/x509 certificate parsing and golang.org/x/crypto/cryptobyte On 32-bit architectures, a malformed input to crypto/x509 or the ASN.1 parsing functions of golang.org/x/crypto/cryptobyte can lead to a panic. The malformed certificate can be delivered via a crypto/tls connection to a client, or to a server that accepts client certificates. net/http clients can be made to crash by an HTTPS server, while net/http servers that accept client certificates will recover the panic and are unaffected. Thanks to Project Wycheproof for providing the test cases that led to the discovery of this issue. The issue is CVE-2020-7919 and Go issue golang.org/issue/36837. Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2020-01-29 11:25:09 +01:00 · 2020-01-29 11:25:09 +01:00 · 27d9aa2d9f
parent 19fd390c36
commit 27d9aa2d9f
54 changed files with 3989 additions and 2464 deletions
--- a/vendor.conf
+++ b/vendor.conf
@ -71,7 +71,7 @@ github.com/tonistiigi/units                         6950e57a87eaf136bbe44ef2ec8e
 github.com/xeipuuv/gojsonpointer                    02993c407bfbf5f6dae44c4f4b1cf6a39b5fc5bb
 github.com/xeipuuv/gojsonreference                  bd5ef7bd5415a7ac448318e64f11a24cd21e594b
 github.com/xeipuuv/gojsonschema                     f971f3cd73b2899de6923801c147f075263e0c50 # v1.1.0
-golang.org/x/crypto                                 88737f569e3a9c7ab309cdc09a07fe7fc87233c3
+golang.org/x/crypto                                 69ecbb4d6d5dab05e49161c6e77ea40a030884e1
 golang.org/x/net                                    f3200d17e092c607f615320ecaad13d87ad9a2b3
 golang.org/x/oauth2                                 ef147856a6ddbb60760db74283d2424e98c87bff
 golang.org/x/sync                                   e225da77a7e68af35c70ccbf71af2b83e6acac3c
--- a/vendor/golang.org/x/crypto/chacha20/chacha_arm64.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_arm64.go
@ -0,0 +1,17 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build go1.11
+// +build !gccgo,!appengine
+
+package chacha20
+
+const bufSize = 256
+
+//go:noescape
+func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
+
+func (c *Cipher) xorKeyStreamBlocks(dst, src []byte) {
+	xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter)
+}
--- a/vendor/golang.org/x/crypto/internal/chacha20/asm_arm64.s
+++ b/vendor/golang.org/x/crypto/internal/chacha20/asm_arm64.s
--- a/vendor/golang.org/x/crypto/chacha20/chacha_generic.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_generic.go
@ -0,0 +1,364 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package chacha20 implements the ChaCha20 and XChaCha20 encryption algorithms
+// as specified in RFC 8439 and draft-irtf-cfrg-xchacha-01.
+package chacha20
+
+import (
+	"crypto/cipher"
+	"encoding/binary"
+	"errors"
+	"math/bits"
+
+	"golang.org/x/crypto/internal/subtle"
+)
+
+const (
+	// KeySize is the size of the key used by this cipher, in bytes.
+	KeySize = 32
+
+	// NonceSize is the size of the nonce used with the standard variant of this
+	// cipher, in bytes.
+	//
+	// Note that this is too short to be safely generated at random if the same
+	// key is reused more than 2³² times.
+	NonceSize = 12
+
+	// NonceSizeX is the size of the nonce used with the XChaCha20 variant of
+	// this cipher, in bytes.
+	NonceSizeX = 24
+)
+
+// Cipher is a stateful instance of ChaCha20 or XChaCha20 using a particular key
+// and nonce. A *Cipher implements the cipher.Stream interface.
+type Cipher struct {
+	// The ChaCha20 state is 16 words: 4 constant, 8 of key, 1 of counter
+	// (incremented after each block), and 3 of nonce.
+	key     [8]uint32
+	counter uint32
+	nonce   [3]uint32
+
+	// The last len bytes of buf are leftover key stream bytes from the previous
+	// XORKeyStream invocation. The size of buf depends on how many blocks are
+	// computed at a time.
+	buf [bufSize]byte
+	len int
+
+	// The counter-independent results of the first round are cached after they
+	// are computed the first time.
+	precompDone      bool
+	p1, p5, p9, p13  uint32
+	p2, p6, p10, p14 uint32
+	p3, p7, p11, p15 uint32
+}
+
+var _ cipher.Stream = (*Cipher)(nil)
+
+// NewUnauthenticatedCipher creates a new ChaCha20 stream cipher with the given
+// 32 bytes key and a 12 or 24 bytes nonce. If a nonce of 24 bytes is provided,
+// the XChaCha20 construction will be used. It returns an error if key or nonce
+// have any other length.
+//
+// Note that ChaCha20, like all stream ciphers, is not authenticated and allows
+// attackers to silently tamper with the plaintext. For this reason, it is more
+// appropriate as a building block than as a standalone encryption mechanism.
+// Instead, consider using package golang.org/x/crypto/chacha20poly1305.
+func NewUnauthenticatedCipher(key, nonce []byte) (*Cipher, error) {
+	// This function is split into a wrapper so that the Cipher allocation will
+	// be inlined, and depending on how the caller uses the return value, won't
+	// escape to the heap.
+	c := &Cipher{}
+	return newUnauthenticatedCipher(c, key, nonce)
+}
+
+func newUnauthenticatedCipher(c *Cipher, key, nonce []byte) (*Cipher, error) {
+	if len(key) != KeySize {
+		return nil, errors.New("chacha20: wrong key size")
+	}
+	if len(nonce) == NonceSizeX {
+		// XChaCha20 uses the ChaCha20 core to mix 16 bytes of the nonce into a
+		// derived key, allowing it to operate on a nonce of 24 bytes. See
+		// draft-irtf-cfrg-xchacha-01, Section 2.3.
+		key, _ = HChaCha20(key, nonce[0:16])
+		cNonce := make([]byte, NonceSize)
+		copy(cNonce[4:12], nonce[16:24])
+		nonce = cNonce
+	} else if len(nonce) != NonceSize {
+		return nil, errors.New("chacha20: wrong nonce size")
+	}
+
+	c.key = [8]uint32{
+		binary.LittleEndian.Uint32(key[0:4]),
+		binary.LittleEndian.Uint32(key[4:8]),
+		binary.LittleEndian.Uint32(key[8:12]),
+		binary.LittleEndian.Uint32(key[12:16]),
+		binary.LittleEndian.Uint32(key[16:20]),
+		binary.LittleEndian.Uint32(key[20:24]),
+		binary.LittleEndian.Uint32(key[24:28]),
+		binary.LittleEndian.Uint32(key[28:32]),
+	}
+	c.nonce = [3]uint32{
+		binary.LittleEndian.Uint32(nonce[0:4]),
+		binary.LittleEndian.Uint32(nonce[4:8]),
+		binary.LittleEndian.Uint32(nonce[8:12]),
+	}
+	return c, nil
+}
+
+// The constant first 4 words of the ChaCha20 state.
+const (
+	j0 uint32 = 0x61707865 // expa
+	j1 uint32 = 0x3320646e // nd 3
+	j2 uint32 = 0x79622d32 // 2-by
+	j3 uint32 = 0x6b206574 // te k
+)
+
+const blockSize = 64
+
+// quarterRound is the core of ChaCha20. It shuffles the bits of 4 state words.
+// It's executed 4 times for each of the 20 ChaCha20 rounds, operating on all 16
+// words each round, in columnar or diagonal groups of 4 at a time.
+func quarterRound(a, b, c, d uint32) (uint32, uint32, uint32, uint32) {
+	a += b
+	d ^= a
+	d = bits.RotateLeft32(d, 16)
+	c += d
+	b ^= c
+	b = bits.RotateLeft32(b, 12)
+	a += b
+	d ^= a
+	d = bits.RotateLeft32(d, 8)
+	c += d
+	b ^= c
+	b = bits.RotateLeft32(b, 7)
+	return a, b, c, d
+}
+
+// XORKeyStream XORs each byte in the given slice with a byte from the
+// cipher's key stream. Dst and src must overlap entirely or not at all.
+//
+// If len(dst) < len(src), XORKeyStream will panic. It is acceptable
+// to pass a dst bigger than src, and in that case, XORKeyStream will
+// only update dst[:len(src)] and will not touch the rest of dst.
+//
+// Multiple calls to XORKeyStream behave as if the concatenation of
+// the src buffers was passed in a single run. That is, Cipher
+// maintains state and does not reset at each XORKeyStream call.
+func (s *Cipher) XORKeyStream(dst, src []byte) {
+	if len(src) == 0 {
+		return
+	}
+	if len(dst) < len(src) {
+		panic("chacha20: output smaller than input")
+	}
+	dst = dst[:len(src)]
+	if subtle.InexactOverlap(dst, src) {
+		panic("chacha20: invalid buffer overlap")
+	}
+
+	// First, drain any remaining key stream from a previous XORKeyStream.
+	if s.len != 0 {
+		keyStream := s.buf[bufSize-s.len:]
+		if len(src) < len(keyStream) {
+			keyStream = keyStream[:len(src)]
+		}
+		_ = src[len(keyStream)-1] // bounds check elimination hint
+		for i, b := range keyStream {
+			dst[i] = src[i] ^ b
+		}
+		s.len -= len(keyStream)
+		src = src[len(keyStream):]
+		dst = dst[len(keyStream):]
+	}
+
+	const blocksPerBuf = bufSize / blockSize
+	numBufs := (uint64(len(src)) + bufSize - 1) / bufSize
+	if uint64(s.counter)+numBufs*blocksPerBuf >= 1<<32 {
+		panic("chacha20: counter overflow")
+	}
+
+	// xorKeyStreamBlocks implementations expect input lengths that are a
+	// multiple of bufSize. Platform-specific ones process multiple blocks at a
+	// time, so have bufSizes that are a multiple of blockSize.
+
+	rem := len(src) % bufSize
+	full := len(src) - rem
+
+	if full > 0 {
+		s.xorKeyStreamBlocks(dst[:full], src[:full])
+	}
+
+	// If we have a partial (multi-)block, pad it for xorKeyStreamBlocks, and
+	// keep the leftover keystream for the next XORKeyStream invocation.
+	if rem > 0 {
+		s.buf = [bufSize]byte{}
+		copy(s.buf[:], src[full:])
+		s.xorKeyStreamBlocks(s.buf[:], s.buf[:])
+		s.len = bufSize - copy(dst[full:], s.buf[:])
+	}
+}
+
+func (s *Cipher) xorKeyStreamBlocksGeneric(dst, src []byte) {
+	if len(dst) != len(src) || len(dst)%blockSize != 0 {
+		panic("chacha20: internal error: wrong dst and/or src length")
+	}
+
+	// To generate each block of key stream, the initial cipher state
+	// (represented below) is passed through 20 rounds of shuffling,
+	// alternatively applying quarterRounds by columns (like 1, 5, 9, 13)
+	// or by diagonals (like 1, 6, 11, 12).
+	//
+	//      0:cccccccc   1:cccccccc   2:cccccccc   3:cccccccc
+	//      4:kkkkkkkk   5:kkkkkkkk   6:kkkkkkkk   7:kkkkkkkk
+	//      8:kkkkkkkk   9:kkkkkkkk  10:kkkkkkkk  11:kkkkkkkk
+	//     12:bbbbbbbb  13:nnnnnnnn  14:nnnnnnnn  15:nnnnnnnn
+	//
+	//            c=constant k=key b=blockcount n=nonce
+	var (
+		c0, c1, c2, c3   = j0, j1, j2, j3
+		c4, c5, c6, c7   = s.key[0], s.key[1], s.key[2], s.key[3]
+		c8, c9, c10, c11 = s.key[4], s.key[5], s.key[6], s.key[7]
+		_, c13, c14, c15 = s.counter, s.nonce[0], s.nonce[1], s.nonce[2]
+	)
+
+	// Three quarters of the first round don't depend on the counter, so we can
+	// calculate them here, and reuse them for multiple blocks in the loop, and
+	// for future XORKeyStream invocations.
+	if !s.precompDone {
+		s.p1, s.p5, s.p9, s.p13 = quarterRound(c1, c5, c9, c13)
+		s.p2, s.p6, s.p10, s.p14 = quarterRound(c2, c6, c10, c14)
+		s.p3, s.p7, s.p11, s.p15 = quarterRound(c3, c7, c11, c15)
+		s.precompDone = true
+	}
+
+	for i := 0; i < len(src); i += blockSize {
+		// The remainder of the first column round.
+		fcr0, fcr4, fcr8, fcr12 := quarterRound(c0, c4, c8, s.counter)
+
+		// The second diagonal round.
+		x0, x5, x10, x15 := quarterRound(fcr0, s.p5, s.p10, s.p15)
+		x1, x6, x11, x12 := quarterRound(s.p1, s.p6, s.p11, fcr12)
+		x2, x7, x8, x13 := quarterRound(s.p2, s.p7, fcr8, s.p13)
+		x3, x4, x9, x14 := quarterRound(s.p3, fcr4, s.p9, s.p14)
+
+		// The remaining 18 rounds.
+		for i := 0; i < 9; i++ {
+			// Column round.
+			x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
+			x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
+			x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
+			x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
+
+			// Diagonal round.
+			x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
+			x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
+			x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
+			x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
+		}
+
+		// Finally, add back the initial state to generate the key stream.
+		x0 += c0
+		x1 += c1
+		x2 += c2
+		x3 += c3
+		x4 += c4
+		x5 += c5
+		x6 += c6
+		x7 += c7
+		x8 += c8
+		x9 += c9
+		x10 += c10
+		x11 += c11
+		x12 += s.counter
+		x13 += c13
+		x14 += c14
+		x15 += c15
+
+		s.counter += 1
+		if s.counter == 0 {
+			panic("chacha20: internal error: counter overflow")
+		}
+
+		in, out := src[i:], dst[i:]
+		in, out = in[:blockSize], out[:blockSize] // bounds check elimination hint
+
+		// XOR the key stream with the source and write out the result.
+		xor(out[0:], in[0:], x0)
+		xor(out[4:], in[4:], x1)
+		xor(out[8:], in[8:], x2)
+		xor(out[12:], in[12:], x3)
+		xor(out[16:], in[16:], x4)
+		xor(out[20:], in[20:], x5)
+		xor(out[24:], in[24:], x6)
+		xor(out[28:], in[28:], x7)
+		xor(out[32:], in[32:], x8)
+		xor(out[36:], in[36:], x9)
+		xor(out[40:], in[40:], x10)
+		xor(out[44:], in[44:], x11)
+		xor(out[48:], in[48:], x12)
+		xor(out[52:], in[52:], x13)
+		xor(out[56:], in[56:], x14)
+		xor(out[60:], in[60:], x15)
+	}
+}
+
+// HChaCha20 uses the ChaCha20 core to generate a derived key from a 32 bytes
+// key and a 16 bytes nonce. It returns an error if key or nonce have any other
+// length. It is used as part of the XChaCha20 construction.
+func HChaCha20(key, nonce []byte) ([]byte, error) {
+	// This function is split into a wrapper so that the slice allocation will
+	// be inlined, and depending on how the caller uses the return value, won't
+	// escape to the heap.
+	out := make([]byte, 32)
+	return hChaCha20(out, key, nonce)
+}
+
+func hChaCha20(out, key, nonce []byte) ([]byte, error) {
+	if len(key) != KeySize {
+		return nil, errors.New("chacha20: wrong HChaCha20 key size")
+	}
+	if len(nonce) != 16 {
+		return nil, errors.New("chacha20: wrong HChaCha20 nonce size")
+	}
+
+	x0, x1, x2, x3 := j0, j1, j2, j3
+	x4 := binary.LittleEndian.Uint32(key[0:4])
+	x5 := binary.LittleEndian.Uint32(key[4:8])
+	x6 := binary.LittleEndian.Uint32(key[8:12])
+	x7 := binary.LittleEndian.Uint32(key[12:16])
+	x8 := binary.LittleEndian.Uint32(key[16:20])
+	x9 := binary.LittleEndian.Uint32(key[20:24])
+	x10 := binary.LittleEndian.Uint32(key[24:28])
+	x11 := binary.LittleEndian.Uint32(key[28:32])
+	x12 := binary.LittleEndian.Uint32(nonce[0:4])
+	x13 := binary.LittleEndian.Uint32(nonce[4:8])
+	x14 := binary.LittleEndian.Uint32(nonce[8:12])
+	x15 := binary.LittleEndian.Uint32(nonce[12:16])
+
+	for i := 0; i < 10; i++ {
+		// Diagonal round.
+		x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
+		x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
+		x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
+		x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
+
+		// Column round.
+		x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
+		x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
+		x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
+		x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
+	}
+
+	_ = out[31] // bounds check elimination hint
+	binary.LittleEndian.PutUint32(out[0:4], x0)
+	binary.LittleEndian.PutUint32(out[4:8], x1)
+	binary.LittleEndian.PutUint32(out[8:12], x2)
+	binary.LittleEndian.PutUint32(out[12:16], x3)
+	binary.LittleEndian.PutUint32(out[16:20], x12)
+	binary.LittleEndian.PutUint32(out[20:24], x13)
+	binary.LittleEndian.PutUint32(out[24:28], x14)
+	binary.LittleEndian.PutUint32(out[28:32], x15)
+	return out, nil
+}
--- a/vendor/golang.org/x/crypto/chacha20/chacha_noasm.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_noasm.go
@ -0,0 +1,13 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !arm64,!s390x,!ppc64le arm64,!go1.11 gccgo appengine
+
+package chacha20
+
+const bufSize = blockSize
+
+func (s *Cipher) xorKeyStreamBlocks(dst, src []byte) {
+	s.xorKeyStreamBlocksGeneric(dst, src)
+}
--- a/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.go
@ -0,0 +1,16 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !gccgo,!appengine
+
+package chacha20
+
+const bufSize = 256
+
+//go:noescape
+func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
+
+func (c *Cipher) xorKeyStreamBlocks(dst, src []byte) {
+	chaCha20_ctr32_vsx(&dst[0], &src[0], len(src), &c.key, &c.counter)
+}
--- a/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_ppc64le.s
@ -0,0 +1,449 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Based on CRYPTOGAMS code with the following comment:
+// # ====================================================================
+// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+// # project. The module is, however, dual licensed under OpenSSL and
+// # CRYPTOGAMS licenses depending on where you obtain it. For further
+// # details see http://www.openssl.org/~appro/cryptogams/.
+// # ====================================================================
+
+// Code for the perl script that generates the ppc64 assembler
+// can be found in the cryptogams repository at the link below. It is based on
+// the original from openssl.
+
+// https://github.com/dot-asm/cryptogams/commit/a60f5b50ed908e91
+
+// The differences in this and the original implementation are
+// due to the calling conventions and initialization of constants.
+
+// +build !gccgo,!appengine
+
+#include "textflag.h"
+
+#define OUT  R3
+#define INP  R4
+#define LEN  R5
+#define KEY  R6
+#define CNT  R7
+#define TMP  R15
+
+#define CONSTBASE  R16
+#define BLOCKS R17
+
+DATA consts<>+0x00(SB)/8, $0x3320646e61707865
+DATA consts<>+0x08(SB)/8, $0x6b20657479622d32
+DATA consts<>+0x10(SB)/8, $0x0000000000000001
+DATA consts<>+0x18(SB)/8, $0x0000000000000000
+DATA consts<>+0x20(SB)/8, $0x0000000000000004
+DATA consts<>+0x28(SB)/8, $0x0000000000000000
+DATA consts<>+0x30(SB)/8, $0x0a0b08090e0f0c0d
+DATA consts<>+0x38(SB)/8, $0x0203000106070405
+DATA consts<>+0x40(SB)/8, $0x090a0b080d0e0f0c
+DATA consts<>+0x48(SB)/8, $0x0102030005060704
+DATA consts<>+0x50(SB)/8, $0x6170786561707865
+DATA consts<>+0x58(SB)/8, $0x6170786561707865
+DATA consts<>+0x60(SB)/8, $0x3320646e3320646e
+DATA consts<>+0x68(SB)/8, $0x3320646e3320646e
+DATA consts<>+0x70(SB)/8, $0x79622d3279622d32
+DATA consts<>+0x78(SB)/8, $0x79622d3279622d32
+DATA consts<>+0x80(SB)/8, $0x6b2065746b206574
+DATA consts<>+0x88(SB)/8, $0x6b2065746b206574
+DATA consts<>+0x90(SB)/8, $0x0000000100000000
+DATA consts<>+0x98(SB)/8, $0x0000000300000002
+GLOBL consts<>(SB), RODATA, $0xa0
+
+//func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
+TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
+	MOVD out+0(FP), OUT
+	MOVD inp+8(FP), INP
+	MOVD len+16(FP), LEN
+	MOVD key+24(FP), KEY
+	MOVD counter+32(FP), CNT
+
+	// Addressing for constants
+	MOVD $consts<>+0x00(SB), CONSTBASE
+	MOVD $16, R8
+	MOVD $32, R9
+	MOVD $48, R10
+	MOVD $64, R11
+	SRD $6, LEN, BLOCKS
+	// V16
+	LXVW4X (CONSTBASE)(R0), VS48
+	ADD $80,CONSTBASE
+
+	// Load key into V17,V18
+	LXVW4X (KEY)(R0), VS49
+	LXVW4X (KEY)(R8), VS50
+
+	// Load CNT, NONCE into V19
+	LXVW4X (CNT)(R0), VS51
+
+	// Clear V27
+	VXOR V27, V27, V27
+
+	// V28
+	LXVW4X (CONSTBASE)(R11), VS60
+
+	// splat slot from V19 -> V26
+	VSPLTW $0, V19, V26
+
+	VSLDOI $4, V19, V27, V19
+	VSLDOI $12, V27, V19, V19
+
+	VADDUWM V26, V28, V26
+
+	MOVD $10, R14
+	MOVD R14, CTR
+
+loop_outer_vsx:
+	// V0, V1, V2, V3
+	LXVW4X (R0)(CONSTBASE), VS32
+	LXVW4X (R8)(CONSTBASE), VS33
+	LXVW4X (R9)(CONSTBASE), VS34
+	LXVW4X (R10)(CONSTBASE), VS35
+
+	// splat values from V17, V18 into V4-V11
+	VSPLTW $0, V17, V4
+	VSPLTW $1, V17, V5
+	VSPLTW $2, V17, V6
+	VSPLTW $3, V17, V7
+	VSPLTW $0, V18, V8
+	VSPLTW $1, V18, V9
+	VSPLTW $2, V18, V10
+	VSPLTW $3, V18, V11
+
+	// VOR
+	VOR V26, V26, V12
+
+	// splat values from V19 -> V13, V14, V15
+	VSPLTW $1, V19, V13
+	VSPLTW $2, V19, V14
+	VSPLTW $3, V19, V15
+
+	// splat   const values
+	VSPLTISW $-16, V27
+	VSPLTISW $12, V28
+	VSPLTISW $8, V29
+	VSPLTISW $7, V30
+
+loop_vsx:
+	VADDUWM V0, V4, V0
+	VADDUWM V1, V5, V1
+	VADDUWM V2, V6, V2
+	VADDUWM V3, V7, V3
+
+	VXOR V12, V0, V12
+	VXOR V13, V1, V13
+	VXOR V14, V2, V14
+	VXOR V15, V3, V15
+
+	VRLW V12, V27, V12
+	VRLW V13, V27, V13
+	VRLW V14, V27, V14
+	VRLW V15, V27, V15
+
+	VADDUWM V8, V12, V8
+	VADDUWM V9, V13, V9
+	VADDUWM V10, V14, V10
+	VADDUWM V11, V15, V11
+
+	VXOR V4, V8, V4
+	VXOR V5, V9, V5
+	VXOR V6, V10, V6
+	VXOR V7, V11, V7
+
+	VRLW V4, V28, V4
+	VRLW V5, V28, V5
+	VRLW V6, V28, V6
+	VRLW V7, V28, V7
+
+	VADDUWM V0, V4, V0
+	VADDUWM V1, V5, V1
+	VADDUWM V2, V6, V2
+	VADDUWM V3, V7, V3
+
+	VXOR V12, V0, V12
+	VXOR V13, V1, V13
+	VXOR V14, V2, V14
+	VXOR V15, V3, V15
+
+	VRLW V12, V29, V12
+	VRLW V13, V29, V13
+	VRLW V14, V29, V14
+	VRLW V15, V29, V15
+
+	VADDUWM V8, V12, V8
+	VADDUWM V9, V13, V9
+	VADDUWM V10, V14, V10
+	VADDUWM V11, V15, V11
+
+	VXOR V4, V8, V4
+	VXOR V5, V9, V5
+	VXOR V6, V10, V6
+	VXOR V7, V11, V7
+
+	VRLW V4, V30, V4
+	VRLW V5, V30, V5
+	VRLW V6, V30, V6
+	VRLW V7, V30, V7
+
+	VADDUWM V0, V5, V0
+	VADDUWM V1, V6, V1
+	VADDUWM V2, V7, V2
+	VADDUWM V3, V4, V3
+
+	VXOR V15, V0, V15
+	VXOR V12, V1, V12
+	VXOR V13, V2, V13
+	VXOR V14, V3, V14
+
+	VRLW V15, V27, V15
+	VRLW V12, V27, V12
+	VRLW V13, V27, V13
+	VRLW V14, V27, V14
+
+	VADDUWM V10, V15, V10
+	VADDUWM V11, V12, V11
+	VADDUWM V8, V13, V8
+	VADDUWM V9, V14, V9
+
+	VXOR V5, V10, V5
+	VXOR V6, V11, V6
+	VXOR V7, V8, V7
+	VXOR V4, V9, V4
+
+	VRLW V5, V28, V5
+	VRLW V6, V28, V6
+	VRLW V7, V28, V7
+	VRLW V4, V28, V4
+
+	VADDUWM V0, V5, V0
+	VADDUWM V1, V6, V1
+	VADDUWM V2, V7, V2
+	VADDUWM V3, V4, V3
+
+	VXOR V15, V0, V15
+	VXOR V12, V1, V12
+	VXOR V13, V2, V13
+	VXOR V14, V3, V14
+
+	VRLW V15, V29, V15
+	VRLW V12, V29, V12
+	VRLW V13, V29, V13
+	VRLW V14, V29, V14
+
+	VADDUWM V10, V15, V10
+	VADDUWM V11, V12, V11
+	VADDUWM V8, V13, V8
+	VADDUWM V9, V14, V9
+
+	VXOR V5, V10, V5
+	VXOR V6, V11, V6
+	VXOR V7, V8, V7
+	VXOR V4, V9, V4
+
+	VRLW V5, V30, V5
+	VRLW V6, V30, V6
+	VRLW V7, V30, V7
+	VRLW V4, V30, V4
+	BC   16, LT, loop_vsx
+
+	VADDUWM V12, V26, V12
+
+	WORD $0x13600F8C		// VMRGEW V0, V1, V27
+	WORD $0x13821F8C		// VMRGEW V2, V3, V28
+
+	WORD $0x10000E8C		// VMRGOW V0, V1, V0
+	WORD $0x10421E8C		// VMRGOW V2, V3, V2
+
+	WORD $0x13A42F8C		// VMRGEW V4, V5, V29
+	WORD $0x13C63F8C		// VMRGEW V6, V7, V30
+
+	XXPERMDI VS32, VS34, $0, VS33
+	XXPERMDI VS32, VS34, $3, VS35
+	XXPERMDI VS59, VS60, $0, VS32
+	XXPERMDI VS59, VS60, $3, VS34
+
+	WORD $0x10842E8C		// VMRGOW V4, V5, V4
+	WORD $0x10C63E8C		// VMRGOW V6, V7, V6
+
+	WORD $0x13684F8C		// VMRGEW V8, V9, V27
+	WORD $0x138A5F8C		// VMRGEW V10, V11, V28
+
+	XXPERMDI VS36, VS38, $0, VS37
+	XXPERMDI VS36, VS38, $3, VS39
+	XXPERMDI VS61, VS62, $0, VS36
+	XXPERMDI VS61, VS62, $3, VS38
+
+	WORD $0x11084E8C		// VMRGOW V8, V9, V8
+	WORD $0x114A5E8C		// VMRGOW V10, V11, V10
+
+	WORD $0x13AC6F8C		// VMRGEW V12, V13, V29
+	WORD $0x13CE7F8C		// VMRGEW V14, V15, V30
+
+	XXPERMDI VS40, VS42, $0, VS41
+	XXPERMDI VS40, VS42, $3, VS43
+	XXPERMDI VS59, VS60, $0, VS40
+	XXPERMDI VS59, VS60, $3, VS42
+
+	WORD $0x118C6E8C		// VMRGOW V12, V13, V12
+	WORD $0x11CE7E8C		// VMRGOW V14, V15, V14
+
+	VSPLTISW $4, V27
+	VADDUWM V26, V27, V26
+
+	XXPERMDI VS44, VS46, $0, VS45
+	XXPERMDI VS44, VS46, $3, VS47
+	XXPERMDI VS61, VS62, $0, VS44
+	XXPERMDI VS61, VS62, $3, VS46
+
+	VADDUWM V0, V16, V0
+	VADDUWM V4, V17, V4
+	VADDUWM V8, V18, V8
+	VADDUWM V12, V19, V12
+
+	CMPU LEN, $64
+	BLT tail_vsx
+
+	// Bottom of loop
+	LXVW4X (INP)(R0), VS59
+	LXVW4X (INP)(R8), VS60
+	LXVW4X (INP)(R9), VS61
+	LXVW4X (INP)(R10), VS62
+
+	VXOR V27, V0, V27
+	VXOR V28, V4, V28
+	VXOR V29, V8, V29
+	VXOR V30, V12, V30
+
+	STXVW4X VS59, (OUT)(R0)
+	STXVW4X VS60, (OUT)(R8)
+	ADD     $64, INP
+	STXVW4X VS61, (OUT)(R9)
+	ADD     $-64, LEN
+	STXVW4X VS62, (OUT)(R10)
+	ADD     $64, OUT
+	BEQ     done_vsx
+
+	VADDUWM V1, V16, V0
+	VADDUWM V5, V17, V4
+	VADDUWM V9, V18, V8
+	VADDUWM V13, V19, V12
+
+	CMPU  LEN, $64
+	BLT   tail_vsx
+
+	LXVW4X (INP)(R0), VS59
+	LXVW4X (INP)(R8), VS60
+	LXVW4X (INP)(R9), VS61
+	LXVW4X (INP)(R10), VS62
+	VXOR   V27, V0, V27
+
+	VXOR V28, V4, V28
+	VXOR V29, V8, V29
+	VXOR V30, V12, V30
+
+	STXVW4X VS59, (OUT)(R0)
+	STXVW4X VS60, (OUT)(R8)
+	ADD     $64, INP
+	STXVW4X VS61, (OUT)(R9)
+	ADD     $-64, LEN
+	STXVW4X VS62, (OUT)(V10)
+	ADD     $64, OUT
+	BEQ     done_vsx
+
+	VADDUWM V2, V16, V0
+	VADDUWM V6, V17, V4
+	VADDUWM V10, V18, V8
+	VADDUWM V14, V19, V12
+
+	CMPU LEN, $64
+	BLT  tail_vsx
+
+	LXVW4X (INP)(R0), VS59
+	LXVW4X (INP)(R8), VS60
+	LXVW4X (INP)(R9), VS61
+	LXVW4X (INP)(R10), VS62
+
+	VXOR V27, V0, V27
+	VXOR V28, V4, V28
+	VXOR V29, V8, V29
+	VXOR V30, V12, V30
+
+	STXVW4X VS59, (OUT)(R0)
+	STXVW4X VS60, (OUT)(R8)
+	ADD     $64, INP
+	STXVW4X VS61, (OUT)(R9)
+	ADD     $-64, LEN
+	STXVW4X VS62, (OUT)(R10)
+	ADD     $64, OUT
+	BEQ     done_vsx
+
+	VADDUWM V3, V16, V0
+	VADDUWM V7, V17, V4
+	VADDUWM V11, V18, V8
+	VADDUWM V15, V19, V12
+
+	CMPU  LEN, $64
+	BLT   tail_vsx
+
+	LXVW4X (INP)(R0), VS59
+	LXVW4X (INP)(R8), VS60
+	LXVW4X (INP)(R9), VS61
+	LXVW4X (INP)(R10), VS62
+
+	VXOR V27, V0, V27
+	VXOR V28, V4, V28
+	VXOR V29, V8, V29
+	VXOR V30, V12, V30
+
+	STXVW4X VS59, (OUT)(R0)
+	STXVW4X VS60, (OUT)(R8)
+	ADD     $64, INP
+	STXVW4X VS61, (OUT)(R9)
+	ADD     $-64, LEN
+	STXVW4X VS62, (OUT)(R10)
+	ADD     $64, OUT
+
+	MOVD $10, R14
+	MOVD R14, CTR
+	BNE  loop_outer_vsx
+
+done_vsx:
+	// Increment counter by number of 64 byte blocks
+	MOVD (CNT), R14
+	ADD  BLOCKS, R14
+	MOVD R14, (CNT)
+	RET
+
+tail_vsx:
+	ADD  $32, R1, R11
+	MOVD LEN, CTR
+
+	// Save values on stack to copy from
+	STXVW4X VS32, (R11)(R0)
+	STXVW4X VS36, (R11)(R8)
+	STXVW4X VS40, (R11)(R9)
+	STXVW4X VS44, (R11)(R10)
+	ADD $-1, R11, R12
+	ADD $-1, INP
+	ADD $-1, OUT
+
+looptail_vsx:
+	// Copying the result to OUT
+	// in bytes.
+	MOVBZU 1(R12), KEY
+	MOVBZU 1(INP), TMP
+	XOR    KEY, TMP, KEY
+	MOVBU  KEY, 1(OUT)
+	BC     16, LT, looptail_vsx
+
+	// Clear the stack values
+	STXVW4X VS48, (R11)(R0)
+	STXVW4X VS48, (R11)(R8)
+	STXVW4X VS48, (R11)(R9)
+	STXVW4X VS48, (R11)(R10)
+	BR      done_vsx
--- a/vendor/golang.org/x/crypto/chacha20/chacha_s390x.go
+++ b/vendor/golang.org/x/crypto/chacha20/chacha_s390x.go
@ -0,0 +1,26 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !gccgo,!appengine
+
+package chacha20
+
+import "golang.org/x/sys/cpu"
+
+var haveAsm = cpu.S390X.HasVX
+
+const bufSize = 256
+
+// xorKeyStreamVX is an assembly implementation of XORKeyStream. It must only
+// be called when the vector facility is available. Implementation in asm_s390x.s.
+//go:noescape
+func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
+
+func (c *Cipher) xorKeyStreamBlocks(dst, src []byte) {
+	if cpu.S390X.HasVX {
+		xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter)
+	} else {
+		c.xorKeyStreamBlocksGeneric(dst, src)
+	}
+}
--- a/vendor/golang.org/x/crypto/internal/chacha20/chacha_s390x.s
+++ b/vendor/golang.org/x/crypto/internal/chacha20/chacha_s390x.s
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build s390x,!gccgo,!appengine
+// +build !gccgo,!appengine

 #include "go_asm.h"
 #include "textflag.h"
@ -24,15 +24,6 @@ DATA ·constants<>+0x14(SB)/4, $0x3320646e
 DATA ·constants<>+0x18(SB)/4, $0x79622d32
 DATA ·constants<>+0x1c(SB)/4, $0x6b206574

-// EXRL targets:
-TEXT ·mvcSrcToBuf(SB), NOFRAME|NOSPLIT, $0
-	MVC $1, (R1), (R8)
-	RET
-
-TEXT ·mvcBufToDst(SB), NOFRAME|NOSPLIT, $0
-	MVC $1, (R8), (R9)
-	RET
-
 #define BSWAP V5
 #define J0    V6
 #define KEY0  V7
@ -144,7 +135,7 @@ TEXT ·mvcBufToDst(SB), NOFRAME|NOSPLIT, $0
 	VMRHF v, w, c \ // c = {a[2], b[2], c[2], d[2]}
 	VMRLF v, w, d // d = {a[3], b[3], c[3], d[3]}

-// func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32, buf *[256]byte, len *int)
+// func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
 TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0
 	MOVD $·constants<>(SB), R1
 	MOVD dst+0(FP), R2         // R2=&dst[0]
@ -152,25 +143,10 @@ TEXT ·xorKeyStreamVX(SB), NOSPLIT, $0
 	MOVD key+48(FP), R5        // R5=key
 	MOVD nonce+56(FP), R6      // R6=nonce
 	MOVD counter+64(FP), R7    // R7=counter
-	MOVD buf+72(FP), R8        // R8=buf
-	MOVD len+80(FP), R9        // R9=len

 	// load BSWAP and J0
 	VLM (R1), BSWAP, J0

-	// set up tail buffer
-	ADD     $-1, R4, R12
-	MOVBZ   R12, R12
-	CMPUBEQ R12, $255, aligned
-	MOVD    R4, R1
-	AND     $~255, R1
-	MOVD    $(R3)(R1*1), R1
-	EXRL    $·mvcSrcToBuf(SB), R12
-	MOVD    $255, R0
-	SUB     R12, R0
-	MOVD    R0, (R9)               // update len
-
-aligned:
 	// setup
 	MOVD  $95, R0
 	VLM   (R5), KEY0, KEY1
@ -217,9 +193,7 @@ loop:

 	// decrement length
 	ADD $-256, R4
-	BLT tail

-continue:
 	// rearrange vectors
 	SHUFFLE(X0, X1, X2, X3, M0, M1, M2, M3)
 	ADDV(J0, X0, X1, X2, X3)
@ -245,16 +219,6 @@ continue:
 	MOVD $256(R3), R3

 	CMPBNE  R4, $0, chacha
-	CMPUBEQ R12, $255, return
-	EXRL    $·mvcBufToDst(SB), R12 // len was updated during setup

-return:
 	VSTEF $0, CTR, (R7)
 	RET
-
-tail:
-	MOVD R2, R9
-	MOVD R8, R2
-	MOVD R8, R3
-	MOVD $0, R4
-	JMP  continue
--- a/vendor/golang.org/x/crypto/internal/chacha20/xor.go
+++ b/vendor/golang.org/x/crypto/internal/chacha20/xor.go
@ -4,9 +4,7 @@

 package chacha20

-import (
-	"runtime"
-)
+import "runtime"

 // Platforms that have fast unaligned 32-bit little endian accesses.
 const unaligned = runtime.GOARCH == "386" ||
--- a/vendor/golang.org/x/crypto/curve25519/const_amd64.h
+++ b/vendor/golang.org/x/crypto/curve25519/const_amd64.h
@ -1,8 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-#define REDMASK51     0x0007FFFFFFFFFFFF
--- a/vendor/golang.org/x/crypto/curve25519/const_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/const_amd64.s
@ -1,20 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// +build amd64,!gccgo,!appengine
-
-// These constants cannot be encoded in non-MOVQ immediates.
-// We access them directly from memory instead.
-
-DATA ·_121666_213(SB)/8, $996687872
-GLOBL ·_121666_213(SB), 8, $8
-
-DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA
-GLOBL ·_2P0(SB), 8, $8
-
-DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE
-GLOBL ·_2P1234(SB), 8, $8
--- a/vendor/golang.org/x/crypto/curve25519/cswap_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/cswap_amd64.s
@ -1,65 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build amd64,!gccgo,!appengine
-
-// func cswap(inout *[4][5]uint64, v uint64)
-TEXT ·cswap(SB),7,$0
-	MOVQ inout+0(FP),DI
-	MOVQ v+8(FP),SI
-
-	SUBQ $1, SI
-	NOTQ SI
-	MOVQ SI, X15
-	PSHUFD $0x44, X15, X15
-
-	MOVOU 0(DI), X0
-	MOVOU 16(DI), X2
-	MOVOU 32(DI), X4
-	MOVOU 48(DI), X6
-	MOVOU 64(DI), X8
-	MOVOU 80(DI), X1
-	MOVOU 96(DI), X3
-	MOVOU 112(DI), X5
-	MOVOU 128(DI), X7
-	MOVOU 144(DI), X9
-
-	MOVO X1, X10
-	MOVO X3, X11
-	MOVO X5, X12
-	MOVO X7, X13
-	MOVO X9, X14
-
-	PXOR X0, X10
-	PXOR X2, X11
-	PXOR X4, X12
-	PXOR X6, X13
-	PXOR X8, X14
-	PAND X15, X10
-	PAND X15, X11
-	PAND X15, X12
-	PAND X15, X13
-	PAND X15, X14
-	PXOR X10, X0
-	PXOR X10, X1
-	PXOR X11, X2
-	PXOR X11, X3
-	PXOR X12, X4
-	PXOR X12, X5
-	PXOR X13, X6
-	PXOR X13, X7
-	PXOR X14, X8
-	PXOR X14, X9
-
-	MOVOU X0, 0(DI)
-	MOVOU X2, 16(DI)
-	MOVOU X4, 32(DI)
-	MOVOU X6, 48(DI)
-	MOVOU X8, 64(DI)
-	MOVOU X1, 80(DI)
-	MOVOU X3, 96(DI)
-	MOVOU X5, 112(DI)
-	MOVOU X7, 128(DI)
-	MOVOU X9, 144(DI)
-	RET
--- a/vendor/golang.org/x/crypto/curve25519/curve25519.go
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519.go
@ -1,834 +1,95 @@
-// Copyright 2013 The Go Authors. All rights reserved.
+// Copyright 2019 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// We have an implementation in amd64 assembly so this code is only run on
-// non-amd64 platforms. The amd64 assembly does not support gccgo.
-// +build !amd64 gccgo appengine
-
-package curve25519
+// Package curve25519 provides an implementation of the X25519 function, which
+// performs scalar multiplication on the elliptic curve known as Curve25519.
+// See RFC 7748.
+package curve25519 // import "golang.org/x/crypto/curve25519"

 import (
-	"encoding/binary"
+	"crypto/subtle"
+	"fmt"
 )

-// This code is a port of the public domain, "ref10" implementation of
-// curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
+// ScalarMult sets dst to the product scalar * point.
+//
+// Deprecated: when provided a low-order point, ScalarMult will set dst to all
+// zeroes, irrespective of the scalar. Instead, use the X25519 function, which
+// will return an error.
+func ScalarMult(dst, scalar, point *[32]byte) {
+	scalarMult(dst, scalar, point)
+}

-// fieldElement represents an element of the field GF(2^255 - 19). An element
-// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
-// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
-// context.
-type fieldElement [10]int32
+// ScalarBaseMult sets dst to the product scalar * base where base is the
+// standard generator.
+//
+// It is recommended to use the X25519 function with Basepoint instead, as
+// copying into fixed size arrays can lead to unexpected bugs.
+func ScalarBaseMult(dst, scalar *[32]byte) {
+	ScalarMult(dst, scalar, &basePoint)
+}

-func feZero(fe *fieldElement) {
-	for i := range fe {
-		fe[i] = 0
+const (
+	// ScalarSize is the size of the scalar input to X25519.
+	ScalarSize = 32
+	// PointSize is the size of the point input to X25519.
+	PointSize = 32
+)
+
+// Basepoint is the canonical Curve25519 generator.
+var Basepoint []byte
+
+var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+
+func init() { Basepoint = basePoint[:] }
+
+func checkBasepoint() {
+	if subtle.ConstantTimeCompare(Basepoint, []byte{
+		0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	}) != 1 {
+		panic("curve25519: global Basepoint value was modified")
 	}
 }

-func feOne(fe *fieldElement) {
-	feZero(fe)
-	fe[0] = 1
+// X25519 returns the result of the scalar multiplication (scalar * point),
+// according to RFC 7748, Section 5. scalar, point and the return value are
+// slices of 32 bytes.
+//
+// scalar can be generated at random, for example with crypto/rand. point should
+// be either Basepoint or the output of another X25519 call.
+//
+// If point is Basepoint (but not if it's a different slice with the same
+// contents) a precomputed implementation might be used for performance.
+func X25519(scalar, point []byte) ([]byte, error) {
+	// Outline the body of function, to let the allocation be inlined in the
+	// caller, and possibly avoid escaping to the heap.
+	var dst [32]byte
+	return x25519(&dst, scalar, point)
 }

-func feAdd(dst, a, b *fieldElement) {
-	for i := range dst {
-		dst[i] = a[i] + b[i]
+func x25519(dst *[32]byte, scalar, point []byte) ([]byte, error) {
+	var in [32]byte
+	if l := len(scalar); l != 32 {
+		return nil, fmt.Errorf("bad scalar length: %d, expected %d", l, 32)
 	}
-}
-
-func feSub(dst, a, b *fieldElement) {
-	for i := range dst {
-		dst[i] = a[i] - b[i]
-	}
-}
-
-func feCopy(dst, src *fieldElement) {
-	for i := range dst {
-		dst[i] = src[i]
-	}
-}
-
-// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
-//
-// Preconditions: b in {0,1}.
-func feCSwap(f, g *fieldElement, b int32) {
-	b = -b
-	for i := range f {
-		t := b & (f[i] ^ g[i])
-		f[i] ^= t
-		g[i] ^= t
-	}
-}
-
-// load3 reads a 24-bit, little-endian value from in.
-func load3(in []byte) int64 {
-	var r int64
-	r = int64(in[0])
-	r |= int64(in[1]) << 8
-	r |= int64(in[2]) << 16
-	return r
-}
-
-// load4 reads a 32-bit, little-endian value from in.
-func load4(in []byte) int64 {
-	return int64(binary.LittleEndian.Uint32(in))
-}
-
-func feFromBytes(dst *fieldElement, src *[32]byte) {
-	h0 := load4(src[:])
-	h1 := load3(src[4:]) << 6
-	h2 := load3(src[7:]) << 5
-	h3 := load3(src[10:]) << 3
-	h4 := load3(src[13:]) << 2
-	h5 := load4(src[16:])
-	h6 := load3(src[20:]) << 7
-	h7 := load3(src[23:]) << 5
-	h8 := load3(src[26:]) << 4
-	h9 := (load3(src[29:]) & 0x7fffff) << 2
-
-	var carry [10]int64
-	carry[9] = (h9 + 1<<24) >> 25
-	h0 += carry[9] * 19
-	h9 -= carry[9] << 25
-	carry[1] = (h1 + 1<<24) >> 25
-	h2 += carry[1]
-	h1 -= carry[1] << 25
-	carry[3] = (h3 + 1<<24) >> 25
-	h4 += carry[3]
-	h3 -= carry[3] << 25
-	carry[5] = (h5 + 1<<24) >> 25
-	h6 += carry[5]
-	h5 -= carry[5] << 25
-	carry[7] = (h7 + 1<<24) >> 25
-	h8 += carry[7]
-	h7 -= carry[7] << 25
-
-	carry[0] = (h0 + 1<<25) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	carry[2] = (h2 + 1<<25) >> 26
-	h3 += carry[2]
-	h2 -= carry[2] << 26
-	carry[4] = (h4 + 1<<25) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	carry[6] = (h6 + 1<<25) >> 26
-	h7 += carry[6]
-	h6 -= carry[6] << 26
-	carry[8] = (h8 + 1<<25) >> 26
-	h9 += carry[8]
-	h8 -= carry[8] << 26
-
-	dst[0] = int32(h0)
-	dst[1] = int32(h1)
-	dst[2] = int32(h2)
-	dst[3] = int32(h3)
-	dst[4] = int32(h4)
-	dst[5] = int32(h5)
-	dst[6] = int32(h6)
-	dst[7] = int32(h7)
-	dst[8] = int32(h8)
-	dst[9] = int32(h9)
-}
-
-// feToBytes marshals h to s.
-// Preconditions:
-//   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-//
-// Write p=2^255-19; q=floor(h/p).
-// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
-//
-// Proof:
-//   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
-//   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
-//
-//   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
-//   Then 0<y<1.
-//
-//   Write r=h-pq.
-//   Have 0<=r<=p-1=2^255-20.
-//   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
-//
-//   Write x=r+19(2^-255)r+y.
-//   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
-//
-//   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
-//   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
-func feToBytes(s *[32]byte, h *fieldElement) {
-	var carry [10]int32
-
-	q := (19*h[9] + (1 << 24)) >> 25
-	q = (h[0] + q) >> 26
-	q = (h[1] + q) >> 25
-	q = (h[2] + q) >> 26
-	q = (h[3] + q) >> 25
-	q = (h[4] + q) >> 26
-	q = (h[5] + q) >> 25
-	q = (h[6] + q) >> 26
-	q = (h[7] + q) >> 25
-	q = (h[8] + q) >> 26
-	q = (h[9] + q) >> 25
-
-	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
-	h[0] += 19 * q
-	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
-
-	carry[0] = h[0] >> 26
-	h[1] += carry[0]
-	h[0] -= carry[0] << 26
-	carry[1] = h[1] >> 25
-	h[2] += carry[1]
-	h[1] -= carry[1] << 25
-	carry[2] = h[2] >> 26
-	h[3] += carry[2]
-	h[2] -= carry[2] << 26
-	carry[3] = h[3] >> 25
-	h[4] += carry[3]
-	h[3] -= carry[3] << 25
-	carry[4] = h[4] >> 26
-	h[5] += carry[4]
-	h[4] -= carry[4] << 26
-	carry[5] = h[5] >> 25
-	h[6] += carry[5]
-	h[5] -= carry[5] << 25
-	carry[6] = h[6] >> 26
-	h[7] += carry[6]
-	h[6] -= carry[6] << 26
-	carry[7] = h[7] >> 25
-	h[8] += carry[7]
-	h[7] -= carry[7] << 25
-	carry[8] = h[8] >> 26
-	h[9] += carry[8]
-	h[8] -= carry[8] << 26
-	carry[9] = h[9] >> 25
-	h[9] -= carry[9] << 25
-	// h10 = carry9
-
-	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
-	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
-	// evidently 2^255 h10-2^255 q = 0.
-	// Goal: Output h[0]+...+2^230 h[9].
-
-	s[0] = byte(h[0] >> 0)
-	s[1] = byte(h[0] >> 8)
-	s[2] = byte(h[0] >> 16)
-	s[3] = byte((h[0] >> 24) | (h[1] << 2))
-	s[4] = byte(h[1] >> 6)
-	s[5] = byte(h[1] >> 14)
-	s[6] = byte((h[1] >> 22) | (h[2] << 3))
-	s[7] = byte(h[2] >> 5)
-	s[8] = byte(h[2] >> 13)
-	s[9] = byte((h[2] >> 21) | (h[3] << 5))
-	s[10] = byte(h[3] >> 3)
-	s[11] = byte(h[3] >> 11)
-	s[12] = byte((h[3] >> 19) | (h[4] << 6))
-	s[13] = byte(h[4] >> 2)
-	s[14] = byte(h[4] >> 10)
-	s[15] = byte(h[4] >> 18)
-	s[16] = byte(h[5] >> 0)
-	s[17] = byte(h[5] >> 8)
-	s[18] = byte(h[5] >> 16)
-	s[19] = byte((h[5] >> 24) | (h[6] << 1))
-	s[20] = byte(h[6] >> 7)
-	s[21] = byte(h[6] >> 15)
-	s[22] = byte((h[6] >> 23) | (h[7] << 3))
-	s[23] = byte(h[7] >> 5)
-	s[24] = byte(h[7] >> 13)
-	s[25] = byte((h[7] >> 21) | (h[8] << 4))
-	s[26] = byte(h[8] >> 4)
-	s[27] = byte(h[8] >> 12)
-	s[28] = byte((h[8] >> 20) | (h[9] << 6))
-	s[29] = byte(h[9] >> 2)
-	s[30] = byte(h[9] >> 10)
-	s[31] = byte(h[9] >> 18)
-}
-
-// feMul calculates h = f * g
-// Can overlap h with f or g.
-//
-// Preconditions:
-//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-//    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-//
-// Postconditions:
-//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-//
-// Notes on implementation strategy:
-//
-// Using schoolbook multiplication.
-// Karatsuba would save a little in some cost models.
-//
-// Most multiplications by 2 and 19 are 32-bit precomputations;
-// cheaper than 64-bit postcomputations.
-//
-// There is one remaining multiplication by 19 in the carry chain;
-// one *19 precomputation can be merged into this,
-// but the resulting data flow is considerably less clean.
-//
-// There are 12 carries below.
-// 10 of them are 2-way parallelizable and vectorizable.
-// Can get away with 11 carries, but then data flow is much deeper.
-//
-// With tighter constraints on inputs can squeeze carries into int32.
-func feMul(h, f, g *fieldElement) {
-	f0 := f[0]
-	f1 := f[1]
-	f2 := f[2]
-	f3 := f[3]
-	f4 := f[4]
-	f5 := f[5]
-	f6 := f[6]
-	f7 := f[7]
-	f8 := f[8]
-	f9 := f[9]
-	g0 := g[0]
-	g1 := g[1]
-	g2 := g[2]
-	g3 := g[3]
-	g4 := g[4]
-	g5 := g[5]
-	g6 := g[6]
-	g7 := g[7]
-	g8 := g[8]
-	g9 := g[9]
-	g1_19 := 19 * g1 // 1.4*2^29
-	g2_19 := 19 * g2 // 1.4*2^30; still ok
-	g3_19 := 19 * g3
-	g4_19 := 19 * g4
-	g5_19 := 19 * g5
-	g6_19 := 19 * g6
-	g7_19 := 19 * g7
-	g8_19 := 19 * g8
-	g9_19 := 19 * g9
-	f1_2 := 2 * f1
-	f3_2 := 2 * f3
-	f5_2 := 2 * f5
-	f7_2 := 2 * f7
-	f9_2 := 2 * f9
-	f0g0 := int64(f0) * int64(g0)
-	f0g1 := int64(f0) * int64(g1)
-	f0g2 := int64(f0) * int64(g2)
-	f0g3 := int64(f0) * int64(g3)
-	f0g4 := int64(f0) * int64(g4)
-	f0g5 := int64(f0) * int64(g5)
-	f0g6 := int64(f0) * int64(g6)
-	f0g7 := int64(f0) * int64(g7)
-	f0g8 := int64(f0) * int64(g8)
-	f0g9 := int64(f0) * int64(g9)
-	f1g0 := int64(f1) * int64(g0)
-	f1g1_2 := int64(f1_2) * int64(g1)
-	f1g2 := int64(f1) * int64(g2)
-	f1g3_2 := int64(f1_2) * int64(g3)
-	f1g4 := int64(f1) * int64(g4)
-	f1g5_2 := int64(f1_2) * int64(g5)
-	f1g6 := int64(f1) * int64(g6)
-	f1g7_2 := int64(f1_2) * int64(g7)
-	f1g8 := int64(f1) * int64(g8)
-	f1g9_38 := int64(f1_2) * int64(g9_19)
-	f2g0 := int64(f2) * int64(g0)
-	f2g1 := int64(f2) * int64(g1)
-	f2g2 := int64(f2) * int64(g2)
-	f2g3 := int64(f2) * int64(g3)
-	f2g4 := int64(f2) * int64(g4)
-	f2g5 := int64(f2) * int64(g5)
-	f2g6 := int64(f2) * int64(g6)
-	f2g7 := int64(f2) * int64(g7)
-	f2g8_19 := int64(f2) * int64(g8_19)
-	f2g9_19 := int64(f2) * int64(g9_19)
-	f3g0 := int64(f3) * int64(g0)
-	f3g1_2 := int64(f3_2) * int64(g1)
-	f3g2 := int64(f3) * int64(g2)
-	f3g3_2 := int64(f3_2) * int64(g3)
-	f3g4 := int64(f3) * int64(g4)
-	f3g5_2 := int64(f3_2) * int64(g5)
-	f3g6 := int64(f3) * int64(g6)
-	f3g7_38 := int64(f3_2) * int64(g7_19)
-	f3g8_19 := int64(f3) * int64(g8_19)
-	f3g9_38 := int64(f3_2) * int64(g9_19)
-	f4g0 := int64(f4) * int64(g0)
-	f4g1 := int64(f4) * int64(g1)
-	f4g2 := int64(f4) * int64(g2)
-	f4g3 := int64(f4) * int64(g3)
-	f4g4 := int64(f4) * int64(g4)
-	f4g5 := int64(f4) * int64(g5)
-	f4g6_19 := int64(f4) * int64(g6_19)
-	f4g7_19 := int64(f4) * int64(g7_19)
-	f4g8_19 := int64(f4) * int64(g8_19)
-	f4g9_19 := int64(f4) * int64(g9_19)
-	f5g0 := int64(f5) * int64(g0)
-	f5g1_2 := int64(f5_2) * int64(g1)
-	f5g2 := int64(f5) * int64(g2)
-	f5g3_2 := int64(f5_2) * int64(g3)
-	f5g4 := int64(f5) * int64(g4)
-	f5g5_38 := int64(f5_2) * int64(g5_19)
-	f5g6_19 := int64(f5) * int64(g6_19)
-	f5g7_38 := int64(f5_2) * int64(g7_19)
-	f5g8_19 := int64(f5) * int64(g8_19)
-	f5g9_38 := int64(f5_2) * int64(g9_19)
-	f6g0 := int64(f6) * int64(g0)
-	f6g1 := int64(f6) * int64(g1)
-	f6g2 := int64(f6) * int64(g2)
-	f6g3 := int64(f6) * int64(g3)
-	f6g4_19 := int64(f6) * int64(g4_19)
-	f6g5_19 := int64(f6) * int64(g5_19)
-	f6g6_19 := int64(f6) * int64(g6_19)
-	f6g7_19 := int64(f6) * int64(g7_19)
-	f6g8_19 := int64(f6) * int64(g8_19)
-	f6g9_19 := int64(f6) * int64(g9_19)
-	f7g0 := int64(f7) * int64(g0)
-	f7g1_2 := int64(f7_2) * int64(g1)
-	f7g2 := int64(f7) * int64(g2)
-	f7g3_38 := int64(f7_2) * int64(g3_19)
-	f7g4_19 := int64(f7) * int64(g4_19)
-	f7g5_38 := int64(f7_2) * int64(g5_19)
-	f7g6_19 := int64(f7) * int64(g6_19)
-	f7g7_38 := int64(f7_2) * int64(g7_19)
-	f7g8_19 := int64(f7) * int64(g8_19)
-	f7g9_38 := int64(f7_2) * int64(g9_19)
-	f8g0 := int64(f8) * int64(g0)
-	f8g1 := int64(f8) * int64(g1)
-	f8g2_19 := int64(f8) * int64(g2_19)
-	f8g3_19 := int64(f8) * int64(g3_19)
-	f8g4_19 := int64(f8) * int64(g4_19)
-	f8g5_19 := int64(f8) * int64(g5_19)
-	f8g6_19 := int64(f8) * int64(g6_19)
-	f8g7_19 := int64(f8) * int64(g7_19)
-	f8g8_19 := int64(f8) * int64(g8_19)
-	f8g9_19 := int64(f8) * int64(g9_19)
-	f9g0 := int64(f9) * int64(g0)
-	f9g1_38 := int64(f9_2) * int64(g1_19)
-	f9g2_19 := int64(f9) * int64(g2_19)
-	f9g3_38 := int64(f9_2) * int64(g3_19)
-	f9g4_19 := int64(f9) * int64(g4_19)
-	f9g5_38 := int64(f9_2) * int64(g5_19)
-	f9g6_19 := int64(f9) * int64(g6_19)
-	f9g7_38 := int64(f9_2) * int64(g7_19)
-	f9g8_19 := int64(f9) * int64(g8_19)
-	f9g9_38 := int64(f9_2) * int64(g9_19)
-	h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
-	h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
-	h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
-	h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
-	h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
-	h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
-	h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
-	h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
-	h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
-	h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
-	var carry [10]int64
-
-	// |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
-	//   i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
-	// |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
-	//   i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	// |h0| <= 2^25
-	// |h4| <= 2^25
-	// |h1| <= 1.51*2^58
-	// |h5| <= 1.51*2^58
-
-	carry[1] = (h1 + (1 << 24)) >> 25
-	h2 += carry[1]
-	h1 -= carry[1] << 25
-	carry[5] = (h5 + (1 << 24)) >> 25
-	h6 += carry[5]
-	h5 -= carry[5] << 25
-	// |h1| <= 2^24; from now on fits into int32
-	// |h5| <= 2^24; from now on fits into int32
-	// |h2| <= 1.21*2^59
-	// |h6| <= 1.21*2^59
-
-	carry[2] = (h2 + (1 << 25)) >> 26
-	h3 += carry[2]
-	h2 -= carry[2] << 26
-	carry[6] = (h6 + (1 << 25)) >> 26
-	h7 += carry[6]
-	h6 -= carry[6] << 26
-	// |h2| <= 2^25; from now on fits into int32 unchanged
-	// |h6| <= 2^25; from now on fits into int32 unchanged
-	// |h3| <= 1.51*2^58
-	// |h7| <= 1.51*2^58
-
-	carry[3] = (h3 + (1 << 24)) >> 25
-	h4 += carry[3]
-	h3 -= carry[3] << 25
-	carry[7] = (h7 + (1 << 24)) >> 25
-	h8 += carry[7]
-	h7 -= carry[7] << 25
-	// |h3| <= 2^24; from now on fits into int32 unchanged
-	// |h7| <= 2^24; from now on fits into int32 unchanged
-	// |h4| <= 1.52*2^33
-	// |h8| <= 1.52*2^33
-
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	carry[8] = (h8 + (1 << 25)) >> 26
-	h9 += carry[8]
-	h8 -= carry[8] << 26
-	// |h4| <= 2^25; from now on fits into int32 unchanged
-	// |h8| <= 2^25; from now on fits into int32 unchanged
-	// |h5| <= 1.01*2^24
-	// |h9| <= 1.51*2^58
-
-	carry[9] = (h9 + (1 << 24)) >> 25
-	h0 += carry[9] * 19
-	h9 -= carry[9] << 25
-	// |h9| <= 2^24; from now on fits into int32 unchanged
-	// |h0| <= 1.8*2^37
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	// |h0| <= 2^25; from now on fits into int32 unchanged
-	// |h1| <= 1.01*2^24
-
-	h[0] = int32(h0)
-	h[1] = int32(h1)
-	h[2] = int32(h2)
-	h[3] = int32(h3)
-	h[4] = int32(h4)
-	h[5] = int32(h5)
-	h[6] = int32(h6)
-	h[7] = int32(h7)
-	h[8] = int32(h8)
-	h[9] = int32(h9)
-}
-
-// feSquare calculates h = f*f. Can overlap h with f.
-//
-// Preconditions:
-//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-//
-// Postconditions:
-//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-func feSquare(h, f *fieldElement) {
-	f0 := f[0]
-	f1 := f[1]
-	f2 := f[2]
-	f3 := f[3]
-	f4 := f[4]
-	f5 := f[5]
-	f6 := f[6]
-	f7 := f[7]
-	f8 := f[8]
-	f9 := f[9]
-	f0_2 := 2 * f0
-	f1_2 := 2 * f1
-	f2_2 := 2 * f2
-	f3_2 := 2 * f3
-	f4_2 := 2 * f4
-	f5_2 := 2 * f5
-	f6_2 := 2 * f6
-	f7_2 := 2 * f7
-	f5_38 := 38 * f5 // 1.31*2^30
-	f6_19 := 19 * f6 // 1.31*2^30
-	f7_38 := 38 * f7 // 1.31*2^30
-	f8_19 := 19 * f8 // 1.31*2^30
-	f9_38 := 38 * f9 // 1.31*2^30
-	f0f0 := int64(f0) * int64(f0)
-	f0f1_2 := int64(f0_2) * int64(f1)
-	f0f2_2 := int64(f0_2) * int64(f2)
-	f0f3_2 := int64(f0_2) * int64(f3)
-	f0f4_2 := int64(f0_2) * int64(f4)
-	f0f5_2 := int64(f0_2) * int64(f5)
-	f0f6_2 := int64(f0_2) * int64(f6)
-	f0f7_2 := int64(f0_2) * int64(f7)
-	f0f8_2 := int64(f0_2) * int64(f8)
-	f0f9_2 := int64(f0_2) * int64(f9)
-	f1f1_2 := int64(f1_2) * int64(f1)
-	f1f2_2 := int64(f1_2) * int64(f2)
-	f1f3_4 := int64(f1_2) * int64(f3_2)
-	f1f4_2 := int64(f1_2) * int64(f4)
-	f1f5_4 := int64(f1_2) * int64(f5_2)
-	f1f6_2 := int64(f1_2) * int64(f6)
-	f1f7_4 := int64(f1_2) * int64(f7_2)
-	f1f8_2 := int64(f1_2) * int64(f8)
-	f1f9_76 := int64(f1_2) * int64(f9_38)
-	f2f2 := int64(f2) * int64(f2)
-	f2f3_2 := int64(f2_2) * int64(f3)
-	f2f4_2 := int64(f2_2) * int64(f4)
-	f2f5_2 := int64(f2_2) * int64(f5)
-	f2f6_2 := int64(f2_2) * int64(f6)
-	f2f7_2 := int64(f2_2) * int64(f7)
-	f2f8_38 := int64(f2_2) * int64(f8_19)
-	f2f9_38 := int64(f2) * int64(f9_38)
-	f3f3_2 := int64(f3_2) * int64(f3)
-	f3f4_2 := int64(f3_2) * int64(f4)
-	f3f5_4 := int64(f3_2) * int64(f5_2)
-	f3f6_2 := int64(f3_2) * int64(f6)
-	f3f7_76 := int64(f3_2) * int64(f7_38)
-	f3f8_38 := int64(f3_2) * int64(f8_19)
-	f3f9_76 := int64(f3_2) * int64(f9_38)
-	f4f4 := int64(f4) * int64(f4)
-	f4f5_2 := int64(f4_2) * int64(f5)
-	f4f6_38 := int64(f4_2) * int64(f6_19)
-	f4f7_38 := int64(f4) * int64(f7_38)
-	f4f8_38 := int64(f4_2) * int64(f8_19)
-	f4f9_38 := int64(f4) * int64(f9_38)
-	f5f5_38 := int64(f5) * int64(f5_38)
-	f5f6_38 := int64(f5_2) * int64(f6_19)
-	f5f7_76 := int64(f5_2) * int64(f7_38)
-	f5f8_38 := int64(f5_2) * int64(f8_19)
-	f5f9_76 := int64(f5_2) * int64(f9_38)
-	f6f6_19 := int64(f6) * int64(f6_19)
-	f6f7_38 := int64(f6) * int64(f7_38)
-	f6f8_38 := int64(f6_2) * int64(f8_19)
-	f6f9_38 := int64(f6) * int64(f9_38)
-	f7f7_38 := int64(f7) * int64(f7_38)
-	f7f8_38 := int64(f7_2) * int64(f8_19)
-	f7f9_76 := int64(f7_2) * int64(f9_38)
-	f8f8_19 := int64(f8) * int64(f8_19)
-	f8f9_38 := int64(f8) * int64(f9_38)
-	f9f9_38 := int64(f9) * int64(f9_38)
-	h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
-	h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
-	h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
-	h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
-	h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
-	h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
-	h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
-	h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
-	h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
-	h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
-	var carry [10]int64
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-
-	carry[1] = (h1 + (1 << 24)) >> 25
-	h2 += carry[1]
-	h1 -= carry[1] << 25
-	carry[5] = (h5 + (1 << 24)) >> 25
-	h6 += carry[5]
-	h5 -= carry[5] << 25
-
-	carry[2] = (h2 + (1 << 25)) >> 26
-	h3 += carry[2]
-	h2 -= carry[2] << 26
-	carry[6] = (h6 + (1 << 25)) >> 26
-	h7 += carry[6]
-	h6 -= carry[6] << 26
-
-	carry[3] = (h3 + (1 << 24)) >> 25
-	h4 += carry[3]
-	h3 -= carry[3] << 25
-	carry[7] = (h7 + (1 << 24)) >> 25
-	h8 += carry[7]
-	h7 -= carry[7] << 25
-
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	carry[8] = (h8 + (1 << 25)) >> 26
-	h9 += carry[8]
-	h8 -= carry[8] << 26
-
-	carry[9] = (h9 + (1 << 24)) >> 25
-	h0 += carry[9] * 19
-	h9 -= carry[9] << 25
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-
-	h[0] = int32(h0)
-	h[1] = int32(h1)
-	h[2] = int32(h2)
-	h[3] = int32(h3)
-	h[4] = int32(h4)
-	h[5] = int32(h5)
-	h[6] = int32(h6)
-	h[7] = int32(h7)
-	h[8] = int32(h8)
-	h[9] = int32(h9)
-}
-
-// feMul121666 calculates h = f * 121666. Can overlap h with f.
-//
-// Preconditions:
-//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-//
-// Postconditions:
-//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-func feMul121666(h, f *fieldElement) {
-	h0 := int64(f[0]) * 121666
-	h1 := int64(f[1]) * 121666
-	h2 := int64(f[2]) * 121666
-	h3 := int64(f[3]) * 121666
-	h4 := int64(f[4]) * 121666
-	h5 := int64(f[5]) * 121666
-	h6 := int64(f[6]) * 121666
-	h7 := int64(f[7]) * 121666
-	h8 := int64(f[8]) * 121666
-	h9 := int64(f[9]) * 121666
-	var carry [10]int64
-
-	carry[9] = (h9 + (1 << 24)) >> 25
-	h0 += carry[9] * 19
-	h9 -= carry[9] << 25
-	carry[1] = (h1 + (1 << 24)) >> 25
-	h2 += carry[1]
-	h1 -= carry[1] << 25
-	carry[3] = (h3 + (1 << 24)) >> 25
-	h4 += carry[3]
-	h3 -= carry[3] << 25
-	carry[5] = (h5 + (1 << 24)) >> 25
-	h6 += carry[5]
-	h5 -= carry[5] << 25
-	carry[7] = (h7 + (1 << 24)) >> 25
-	h8 += carry[7]
-	h7 -= carry[7] << 25
-
-	carry[0] = (h0 + (1 << 25)) >> 26
-	h1 += carry[0]
-	h0 -= carry[0] << 26
-	carry[2] = (h2 + (1 << 25)) >> 26
-	h3 += carry[2]
-	h2 -= carry[2] << 26
-	carry[4] = (h4 + (1 << 25)) >> 26
-	h5 += carry[4]
-	h4 -= carry[4] << 26
-	carry[6] = (h6 + (1 << 25)) >> 26
-	h7 += carry[6]
-	h6 -= carry[6] << 26
-	carry[8] = (h8 + (1 << 25)) >> 26
-	h9 += carry[8]
-	h8 -= carry[8] << 26
-
-	h[0] = int32(h0)
-	h[1] = int32(h1)
-	h[2] = int32(h2)
-	h[3] = int32(h3)
-	h[4] = int32(h4)
-	h[5] = int32(h5)
-	h[6] = int32(h6)
-	h[7] = int32(h7)
-	h[8] = int32(h8)
-	h[9] = int32(h9)
-}
-
-// feInvert sets out = z^-1.
-func feInvert(out, z *fieldElement) {
-	var t0, t1, t2, t3 fieldElement
-	var i int
-
-	feSquare(&t0, z)
-	for i = 1; i < 1; i++ {
-		feSquare(&t0, &t0)
-	}
-	feSquare(&t1, &t0)
-	for i = 1; i < 2; i++ {
-		feSquare(&t1, &t1)
-	}
-	feMul(&t1, z, &t1)
-	feMul(&t0, &t0, &t1)
-	feSquare(&t2, &t0)
-	for i = 1; i < 1; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t1, &t1, &t2)
-	feSquare(&t2, &t1)
-	for i = 1; i < 5; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t1, &t2, &t1)
-	feSquare(&t2, &t1)
-	for i = 1; i < 10; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t2, &t2, &t1)
-	feSquare(&t3, &t2)
-	for i = 1; i < 20; i++ {
-		feSquare(&t3, &t3)
-	}
-	feMul(&t2, &t3, &t2)
-	feSquare(&t2, &t2)
-	for i = 1; i < 10; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t1, &t2, &t1)
-	feSquare(&t2, &t1)
-	for i = 1; i < 50; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t2, &t2, &t1)
-	feSquare(&t3, &t2)
-	for i = 1; i < 100; i++ {
-		feSquare(&t3, &t3)
-	}
-	feMul(&t2, &t3, &t2)
-	feSquare(&t2, &t2)
-	for i = 1; i < 50; i++ {
-		feSquare(&t2, &t2)
-	}
-	feMul(&t1, &t2, &t1)
-	feSquare(&t1, &t1)
-	for i = 1; i < 5; i++ {
-		feSquare(&t1, &t1)
-	}
-	feMul(out, &t1, &t0)
-}
-
-func scalarMult(out, in, base *[32]byte) {
-	var e [32]byte
-
-	copy(e[:], in[:])
-	e[0] &= 248
-	e[31] &= 127
-	e[31] |= 64
-
-	var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
-	feFromBytes(&x1, base)
-	feOne(&x2)
-	feCopy(&x3, &x1)
-	feOne(&z3)
-
-	swap := int32(0)
-	for pos := 254; pos >= 0; pos-- {
-		b := e[pos/8] >> uint(pos&7)
-		b &= 1
-		swap ^= int32(b)
-		feCSwap(&x2, &x3, swap)
-		feCSwap(&z2, &z3, swap)
-		swap = int32(b)
-
-		feSub(&tmp0, &x3, &z3)
-		feSub(&tmp1, &x2, &z2)
-		feAdd(&x2, &x2, &z2)
-		feAdd(&z2, &x3, &z3)
-		feMul(&z3, &tmp0, &x2)
-		feMul(&z2, &z2, &tmp1)
-		feSquare(&tmp0, &tmp1)
-		feSquare(&tmp1, &x2)
-		feAdd(&x3, &z3, &z2)
-		feSub(&z2, &z3, &z2)
-		feMul(&x2, &tmp1, &tmp0)
-		feSub(&tmp1, &tmp1, &tmp0)
-		feSquare(&z2, &z2)
-		feMul121666(&z3, &tmp1)
-		feSquare(&x3, &x3)
-		feAdd(&tmp0, &tmp0, &z3)
-		feMul(&z3, &x1, &z2)
-		feMul(&z2, &tmp1, &tmp0)
-	}
-
-	feCSwap(&x2, &x3, swap)
-	feCSwap(&z2, &z3, swap)
-
-	feInvert(&z2, &z2)
-	feMul(&x2, &x2, &z2)
-	feToBytes(out, &x2)
+	if l := len(point); l != 32 {
+		return nil, fmt.Errorf("bad point length: %d, expected %d", l, 32)
+	}
+	copy(in[:], scalar)
+	if &point[0] == &Basepoint[0] {
+		checkBasepoint()
+		ScalarBaseMult(dst, &in)
+	} else {
+		var base, zero [32]byte
+		copy(base[:], point)
+		ScalarMult(dst, &in, &base)
+		if subtle.ConstantTimeCompare(dst[:], zero[:]) == 1 {
+			return nil, fmt.Errorf("bad input point: low order point")
+		}
+	}
+	return dst[:], nil
 }
--- a/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.go
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build amd64,!gccgo,!appengine
+// +build amd64,!gccgo,!appengine,!purego

 package curve25519

--- a/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519_amd64.s
@ -5,9 +5,84 @@
 // This code was translated into a form compatible with 6a from the public
 // domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html

-// +build amd64,!gccgo,!appengine
+// +build amd64,!gccgo,!appengine,!purego

-#include "const_amd64.h"
+#define REDMASK51     0x0007FFFFFFFFFFFF
+
+// These constants cannot be encoded in non-MOVQ immediates.
+// We access them directly from memory instead.
+
+DATA ·_121666_213(SB)/8, $996687872
+GLOBL ·_121666_213(SB), 8, $8
+
+DATA ·_2P0(SB)/8, $0xFFFFFFFFFFFDA
+GLOBL ·_2P0(SB), 8, $8
+
+DATA ·_2P1234(SB)/8, $0xFFFFFFFFFFFFE
+GLOBL ·_2P1234(SB), 8, $8
+
+// func freeze(inout *[5]uint64)
+TEXT ·freeze(SB),7,$0-8
+	MOVQ inout+0(FP), DI
+
+	MOVQ 0(DI),SI
+	MOVQ 8(DI),DX
+	MOVQ 16(DI),CX
+	MOVQ 24(DI),R8
+	MOVQ 32(DI),R9
+	MOVQ $REDMASK51,AX
+	MOVQ AX,R10
+	SUBQ $18,R10
+	MOVQ $3,R11
+REDUCELOOP:
+	MOVQ SI,R12
+	SHRQ $51,R12
+	ANDQ AX,SI
+	ADDQ R12,DX
+	MOVQ DX,R12
+	SHRQ $51,R12
+	ANDQ AX,DX
+	ADDQ R12,CX
+	MOVQ CX,R12
+	SHRQ $51,R12
+	ANDQ AX,CX
+	ADDQ R12,R8
+	MOVQ R8,R12
+	SHRQ $51,R12
+	ANDQ AX,R8
+	ADDQ R12,R9
+	MOVQ R9,R12
+	SHRQ $51,R12
+	ANDQ AX,R9
+	IMUL3Q $19,R12,R12
+	ADDQ R12,SI
+	SUBQ $1,R11
+	JA REDUCELOOP
+	MOVQ $1,R12
+	CMPQ R10,SI
+	CMOVQLT R11,R12
+	CMPQ AX,DX
+	CMOVQNE R11,R12
+	CMPQ AX,CX
+	CMOVQNE R11,R12
+	CMPQ AX,R8
+	CMOVQNE R11,R12
+	CMPQ AX,R9
+	CMOVQNE R11,R12
+	NEGQ R12
+	ANDQ R12,AX
+	ANDQ R12,R10
+	SUBQ R10,SI
+	SUBQ AX,DX
+	SUBQ AX,CX
+	SUBQ AX,R8
+	SUBQ AX,R9
+	MOVQ SI,0(DI)
+	MOVQ DX,8(DI)
+	MOVQ CX,16(DI)
+	MOVQ R8,24(DI)
+	MOVQ R9,32(DI)
+	RET

 // func ladderstep(inout *[5][5]uint64)
 TEXT ·ladderstep(SB),0,$296-8
@ -121,18 +196,18 @@ TEXT ·ladderstep(SB),0,$296-8
 	ADDQ AX,R12
 	ADCQ DX,R13
 	MOVQ $REDMASK51,DX
-	SHLQ $13,CX:SI
+	SHLQ $13,SI,CX
 	ANDQ DX,SI
-	SHLQ $13,R9:R8
+	SHLQ $13,R8,R9
 	ANDQ DX,R8
 	ADDQ CX,R8
-	SHLQ $13,R11:R10
+	SHLQ $13,R10,R11
 	ANDQ DX,R10
 	ADDQ R9,R10
-	SHLQ $13,R13:R12
+	SHLQ $13,R12,R13
 	ANDQ DX,R12
 	ADDQ R11,R12
-	SHLQ $13,R15:R14
+	SHLQ $13,R14,R15
 	ANDQ DX,R14
 	ADDQ R13,R14
 	IMUL3Q $19,R15,CX
@ -236,18 +311,18 @@ TEXT ·ladderstep(SB),0,$296-8
 	ADDQ AX,R12
 	ADCQ DX,R13
 	MOVQ $REDMASK51,DX
-	SHLQ $13,CX:SI
+	SHLQ $13,SI,CX
 	ANDQ DX,SI
-	SHLQ $13,R9:R8
+	SHLQ $13,R8,R9
 	ANDQ DX,R8
 	ADDQ CX,R8
-	SHLQ $13,R11:R10
+	SHLQ $13,R10,R11
 	ANDQ DX,R10
 	ADDQ R9,R10
-	SHLQ $13,R13:R12
+	SHLQ $13,R12,R13
 	ANDQ DX,R12
 	ADDQ R11,R12
-	SHLQ $13,R15:R14
+	SHLQ $13,R14,R15
 	ANDQ DX,R14
 	ADDQ R13,R14
 	IMUL3Q $19,R15,CX
@ -441,18 +516,18 @@ TEXT ·ladderstep(SB),0,$296-8
 	ADDQ AX,R12
 	ADCQ DX,R13
 	MOVQ $REDMASK51,DX
-	SHLQ $13,CX:SI
+	SHLQ $13,SI,CX
 	ANDQ DX,SI
-	SHLQ $13,R9:R8
+	SHLQ $13,R8,R9
 	ANDQ DX,R8
 	ADDQ CX,R8
-	SHLQ $13,R11:R10
+	SHLQ $13,R10,R11
 	ANDQ DX,R10
 	ADDQ R9,R10
-	SHLQ $13,R13:R12
+	SHLQ $13,R12,R13
 	ANDQ DX,R12
 	ADDQ R11,R12
-	SHLQ $13,R15:R14
+	SHLQ $13,R14,R15
 	ANDQ DX,R14
 	ADDQ R13,R14
 	IMUL3Q $19,R15,CX
@ -591,18 +666,18 @@ TEXT ·ladderstep(SB),0,$296-8
 	ADDQ AX,R12
 	ADCQ DX,R13
 	MOVQ $REDMASK51,DX
-	SHLQ $13,CX:SI
+	SHLQ $13,SI,CX
 	ANDQ DX,SI
-	SHLQ $13,R9:R8
+	SHLQ $13,R8,R9
 	ANDQ DX,R8
 	ADDQ CX,R8
-	SHLQ $13,R11:R10
+	SHLQ $13,R10,R11
 	ANDQ DX,R10
 	ADDQ R9,R10
-	SHLQ $13,R13:R12
+	SHLQ $13,R12,R13
 	ANDQ DX,R12
 	ADDQ R11,R12
-	SHLQ $13,R15:R14
+	SHLQ $13,R14,R15
 	ANDQ DX,R14
 	ADDQ R13,R14
 	IMUL3Q $19,R15,CX
@ -731,18 +806,18 @@ TEXT ·ladderstep(SB),0,$296-8
 	ADDQ AX,R12
 	ADCQ DX,R13
 	MOVQ $REDMASK51,DX
-	SHLQ $13,CX:SI
+	SHLQ $13,SI,CX
 	ANDQ DX,SI
-	SHLQ $13,R9:R8
+	SHLQ $13,R8,R9
 	ANDQ DX,R8
 	ADDQ CX,R8
-	SHLQ $13,R11:R10
+	SHLQ $13,R10,R11
 	ANDQ DX,R10
 	ADDQ R9,R10
-	SHLQ $13,R13:R12
+	SHLQ $13,R12,R13
 	ANDQ DX,R12
 	ADDQ R11,R12
-	SHLQ $13,R15:R14
+	SHLQ $13,R14,R15
 	ANDQ DX,R14
 	ADDQ R13,R14
 	IMUL3Q $19,R15,CX
@ -846,18 +921,18 @@ TEXT ·ladderstep(SB),0,$296-8
 	ADDQ AX,R12
 	ADCQ DX,R13
 	MOVQ $REDMASK51,DX
-	SHLQ $13,CX:SI
+	SHLQ $13,SI,CX
 	ANDQ DX,SI
-	SHLQ $13,R9:R8
+	SHLQ $13,R8,R9
 	ANDQ DX,R8
 	ADDQ CX,R8
-	SHLQ $13,R11:R10
+	SHLQ $13,R10,R11
 	ANDQ DX,R10
 	ADDQ R9,R10
-	SHLQ $13,R13:R12
+	SHLQ $13,R12,R13
 	ANDQ DX,R12
 	ADDQ R11,R12
-	SHLQ $13,R15:R14
+	SHLQ $13,R14,R15
 	ANDQ DX,R14
 	ADDQ R13,R14
 	IMUL3Q $19,R15,CX
@ -996,18 +1071,18 @@ TEXT ·ladderstep(SB),0,$296-8
 	ADDQ AX,R12
 	ADCQ DX,R13
 	MOVQ $REDMASK51,DX
-	SHLQ $13,CX:SI
+	SHLQ $13,SI,CX
 	ANDQ DX,SI
-	SHLQ $13,R9:R8
+	SHLQ $13,R8,R9
 	ANDQ DX,R8
 	ADDQ CX,R8
-	SHLQ $13,R11:R10
+	SHLQ $13,R10,R11
 	ANDQ DX,R10
 	ADDQ R9,R10
-	SHLQ $13,R13:R12
+	SHLQ $13,R12,R13
 	ANDQ DX,R12
 	ADDQ R11,R12
-	SHLQ $13,R15:R14
+	SHLQ $13,R14,R15
 	ANDQ DX,R14
 	ADDQ R13,R14
 	IMUL3Q $19,R15,CX
@ -1146,18 +1221,18 @@ TEXT ·ladderstep(SB),0,$296-8
 	ADDQ AX,R12
 	ADCQ DX,R13
 	MOVQ $REDMASK51,DX
-	SHLQ $13,CX:SI
+	SHLQ $13,SI,CX
 	ANDQ DX,SI
-	SHLQ $13,R9:R8
+	SHLQ $13,R8,R9
 	ANDQ DX,R8
 	ADDQ CX,R8
-	SHLQ $13,R11:R10
+	SHLQ $13,R10,R11
 	ANDQ DX,R10
 	ADDQ R9,R10
-	SHLQ $13,R13:R12
+	SHLQ $13,R12,R13
 	ANDQ DX,R12
 	ADDQ R11,R12
-	SHLQ $13,R15:R14
+	SHLQ $13,R14,R15
 	ANDQ DX,R14
 	ADDQ R13,R14
 	IMUL3Q $19,R15,CX
@ -1332,18 +1407,18 @@ TEXT ·ladderstep(SB),0,$296-8
 	ADDQ AX,R12
 	ADCQ DX,R13
 	MOVQ $REDMASK51,DX
-	SHLQ $13,CX:SI
+	SHLQ $13,SI,CX
 	ANDQ DX,SI
-	SHLQ $13,R9:R8
+	SHLQ $13,R8,R9
 	ANDQ DX,R8
 	ADDQ CX,R8
-	SHLQ $13,R11:R10
+	SHLQ $13,R10,R11
 	ANDQ DX,R10
 	ADDQ R9,R10
-	SHLQ $13,R13:R12
+	SHLQ $13,R12,R13
 	ANDQ DX,R12
 	ADDQ R11,R12
-	SHLQ $13,R15:R14
+	SHLQ $13,R14,R15
 	ANDQ DX,R14
 	ADDQ R13,R14
 	IMUL3Q $19,R15,CX
@ -1375,3 +1450,344 @@ TEXT ·ladderstep(SB),0,$296-8
 	MOVQ AX,104(DI)
 	MOVQ R10,112(DI)
 	RET
+
+// func cswap(inout *[4][5]uint64, v uint64)
+TEXT ·cswap(SB),7,$0
+	MOVQ inout+0(FP),DI
+	MOVQ v+8(FP),SI
+
+	SUBQ $1, SI
+	NOTQ SI
+	MOVQ SI, X15
+	PSHUFD $0x44, X15, X15
+
+	MOVOU 0(DI), X0
+	MOVOU 16(DI), X2
+	MOVOU 32(DI), X4
+	MOVOU 48(DI), X6
+	MOVOU 64(DI), X8
+	MOVOU 80(DI), X1
+	MOVOU 96(DI), X3
+	MOVOU 112(DI), X5
+	MOVOU 128(DI), X7
+	MOVOU 144(DI), X9
+
+	MOVO X1, X10
+	MOVO X3, X11
+	MOVO X5, X12
+	MOVO X7, X13
+	MOVO X9, X14
+
+	PXOR X0, X10
+	PXOR X2, X11
+	PXOR X4, X12
+	PXOR X6, X13
+	PXOR X8, X14
+	PAND X15, X10
+	PAND X15, X11
+	PAND X15, X12
+	PAND X15, X13
+	PAND X15, X14
+	PXOR X10, X0
+	PXOR X10, X1
+	PXOR X11, X2
+	PXOR X11, X3
+	PXOR X12, X4
+	PXOR X12, X5
+	PXOR X13, X6
+	PXOR X13, X7
+	PXOR X14, X8
+	PXOR X14, X9
+
+	MOVOU X0, 0(DI)
+	MOVOU X2, 16(DI)
+	MOVOU X4, 32(DI)
+	MOVOU X6, 48(DI)
+	MOVOU X8, 64(DI)
+	MOVOU X1, 80(DI)
+	MOVOU X3, 96(DI)
+	MOVOU X5, 112(DI)
+	MOVOU X7, 128(DI)
+	MOVOU X9, 144(DI)
+	RET
+
+// func mul(dest, a, b *[5]uint64)
+TEXT ·mul(SB),0,$16-24
+	MOVQ dest+0(FP), DI
+	MOVQ a+8(FP), SI
+	MOVQ b+16(FP), DX
+
+	MOVQ DX,CX
+	MOVQ 24(SI),DX
+	IMUL3Q $19,DX,AX
+	MOVQ AX,0(SP)
+	MULQ 16(CX)
+	MOVQ AX,R8
+	MOVQ DX,R9
+	MOVQ 32(SI),DX
+	IMUL3Q $19,DX,AX
+	MOVQ AX,8(SP)
+	MULQ 8(CX)
+	ADDQ AX,R8
+	ADCQ DX,R9
+	MOVQ 0(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,R8
+	ADCQ DX,R9
+	MOVQ 0(SI),AX
+	MULQ 8(CX)
+	MOVQ AX,R10
+	MOVQ DX,R11
+	MOVQ 0(SI),AX
+	MULQ 16(CX)
+	MOVQ AX,R12
+	MOVQ DX,R13
+	MOVQ 0(SI),AX
+	MULQ 24(CX)
+	MOVQ AX,R14
+	MOVQ DX,R15
+	MOVQ 0(SI),AX
+	MULQ 32(CX)
+	MOVQ AX,BX
+	MOVQ DX,BP
+	MOVQ 8(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,R10
+	ADCQ DX,R11
+	MOVQ 8(SI),AX
+	MULQ 8(CX)
+	ADDQ AX,R12
+	ADCQ DX,R13
+	MOVQ 8(SI),AX
+	MULQ 16(CX)
+	ADDQ AX,R14
+	ADCQ DX,R15
+	MOVQ 8(SI),AX
+	MULQ 24(CX)
+	ADDQ AX,BX
+	ADCQ DX,BP
+	MOVQ 8(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 32(CX)
+	ADDQ AX,R8
+	ADCQ DX,R9
+	MOVQ 16(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,R12
+	ADCQ DX,R13
+	MOVQ 16(SI),AX
+	MULQ 8(CX)
+	ADDQ AX,R14
+	ADCQ DX,R15
+	MOVQ 16(SI),AX
+	MULQ 16(CX)
+	ADDQ AX,BX
+	ADCQ DX,BP
+	MOVQ 16(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 24(CX)
+	ADDQ AX,R8
+	ADCQ DX,R9
+	MOVQ 16(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 32(CX)
+	ADDQ AX,R10
+	ADCQ DX,R11
+	MOVQ 24(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,R14
+	ADCQ DX,R15
+	MOVQ 24(SI),AX
+	MULQ 8(CX)
+	ADDQ AX,BX
+	ADCQ DX,BP
+	MOVQ 0(SP),AX
+	MULQ 24(CX)
+	ADDQ AX,R10
+	ADCQ DX,R11
+	MOVQ 0(SP),AX
+	MULQ 32(CX)
+	ADDQ AX,R12
+	ADCQ DX,R13
+	MOVQ 32(SI),AX
+	MULQ 0(CX)
+	ADDQ AX,BX
+	ADCQ DX,BP
+	MOVQ 8(SP),AX
+	MULQ 16(CX)
+	ADDQ AX,R10
+	ADCQ DX,R11
+	MOVQ 8(SP),AX
+	MULQ 24(CX)
+	ADDQ AX,R12
+	ADCQ DX,R13
+	MOVQ 8(SP),AX
+	MULQ 32(CX)
+	ADDQ AX,R14
+	ADCQ DX,R15
+	MOVQ $REDMASK51,SI
+	SHLQ $13,R8,R9
+	ANDQ SI,R8
+	SHLQ $13,R10,R11
+	ANDQ SI,R10
+	ADDQ R9,R10
+	SHLQ $13,R12,R13
+	ANDQ SI,R12
+	ADDQ R11,R12
+	SHLQ $13,R14,R15
+	ANDQ SI,R14
+	ADDQ R13,R14
+	SHLQ $13,BX,BP
+	ANDQ SI,BX
+	ADDQ R15,BX
+	IMUL3Q $19,BP,DX
+	ADDQ DX,R8
+	MOVQ R8,DX
+	SHRQ $51,DX
+	ADDQ R10,DX
+	MOVQ DX,CX
+	SHRQ $51,DX
+	ANDQ SI,R8
+	ADDQ R12,DX
+	MOVQ DX,R9
+	SHRQ $51,DX
+	ANDQ SI,CX
+	ADDQ R14,DX
+	MOVQ DX,AX
+	SHRQ $51,DX
+	ANDQ SI,R9
+	ADDQ BX,DX
+	MOVQ DX,R10
+	SHRQ $51,DX
+	ANDQ SI,AX
+	IMUL3Q $19,DX,DX
+	ADDQ DX,R8
+	ANDQ SI,R10
+	MOVQ R8,0(DI)
+	MOVQ CX,8(DI)
+	MOVQ R9,16(DI)
+	MOVQ AX,24(DI)
+	MOVQ R10,32(DI)
+	RET
+
+// func square(out, in *[5]uint64)
+TEXT ·square(SB),7,$0-16
+	MOVQ out+0(FP), DI
+	MOVQ in+8(FP), SI
+
+	MOVQ 0(SI),AX
+	MULQ 0(SI)
+	MOVQ AX,CX
+	MOVQ DX,R8
+	MOVQ 0(SI),AX
+	SHLQ $1,AX
+	MULQ 8(SI)
+	MOVQ AX,R9
+	MOVQ DX,R10
+	MOVQ 0(SI),AX
+	SHLQ $1,AX
+	MULQ 16(SI)
+	MOVQ AX,R11
+	MOVQ DX,R12
+	MOVQ 0(SI),AX
+	SHLQ $1,AX
+	MULQ 24(SI)
+	MOVQ AX,R13
+	MOVQ DX,R14
+	MOVQ 0(SI),AX
+	SHLQ $1,AX
+	MULQ 32(SI)
+	MOVQ AX,R15
+	MOVQ DX,BX
+	MOVQ 8(SI),AX
+	MULQ 8(SI)
+	ADDQ AX,R11
+	ADCQ DX,R12
+	MOVQ 8(SI),AX
+	SHLQ $1,AX
+	MULQ 16(SI)
+	ADDQ AX,R13
+	ADCQ DX,R14
+	MOVQ 8(SI),AX
+	SHLQ $1,AX
+	MULQ 24(SI)
+	ADDQ AX,R15
+	ADCQ DX,BX
+	MOVQ 8(SI),DX
+	IMUL3Q $38,DX,AX
+	MULQ 32(SI)
+	ADDQ AX,CX
+	ADCQ DX,R8
+	MOVQ 16(SI),AX
+	MULQ 16(SI)
+	ADDQ AX,R15
+	ADCQ DX,BX
+	MOVQ 16(SI),DX
+	IMUL3Q $38,DX,AX
+	MULQ 24(SI)
+	ADDQ AX,CX
+	ADCQ DX,R8
+	MOVQ 16(SI),DX
+	IMUL3Q $38,DX,AX
+	MULQ 32(SI)
+	ADDQ AX,R9
+	ADCQ DX,R10
+	MOVQ 24(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 24(SI)
+	ADDQ AX,R9
+	ADCQ DX,R10
+	MOVQ 24(SI),DX
+	IMUL3Q $38,DX,AX
+	MULQ 32(SI)
+	ADDQ AX,R11
+	ADCQ DX,R12
+	MOVQ 32(SI),DX
+	IMUL3Q $19,DX,AX
+	MULQ 32(SI)
+	ADDQ AX,R13
+	ADCQ DX,R14
+	MOVQ $REDMASK51,SI
+	SHLQ $13,CX,R8
+	ANDQ SI,CX
+	SHLQ $13,R9,R10
+	ANDQ SI,R9
+	ADDQ R8,R9
+	SHLQ $13,R11,R12
+	ANDQ SI,R11
+	ADDQ R10,R11
+	SHLQ $13,R13,R14
+	ANDQ SI,R13
+	ADDQ R12,R13
+	SHLQ $13,R15,BX
+	ANDQ SI,R15
+	ADDQ R14,R15
+	IMUL3Q $19,BX,DX
+	ADDQ DX,CX
+	MOVQ CX,DX
+	SHRQ $51,DX
+	ADDQ R9,DX
+	ANDQ SI,CX
+	MOVQ DX,R8
+	SHRQ $51,DX
+	ADDQ R11,DX
+	ANDQ SI,R8
+	MOVQ DX,R9
+	SHRQ $51,DX
+	ADDQ R13,DX
+	ANDQ SI,R9
+	MOVQ DX,AX
+	SHRQ $51,DX
+	ADDQ R15,DX
+	ANDQ SI,AX
+	MOVQ DX,R10
+	SHRQ $51,DX
+	IMUL3Q $19,DX,DX
+	ADDQ DX,CX
+	ANDQ SI,R10
+	MOVQ CX,0(DI)
+	MOVQ R8,8(DI)
+	MOVQ R9,16(DI)
+	MOVQ AX,24(DI)
+	MOVQ R10,32(DI)
+	RET
--- a/vendor/golang.org/x/crypto/curve25519/curve25519_generic.go
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519_generic.go
@ -0,0 +1,828 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package curve25519
+
+import "encoding/binary"
+
+// This code is a port of the public domain, "ref10" implementation of
+// curve25519 from SUPERCOP 20130419 by D. J. Bernstein.
+
+// fieldElement represents an element of the field GF(2^255 - 19). An element
+// t, entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
+// t[3]+2^102 t[4]+...+2^230 t[9]. Bounds on each t[i] vary depending on
+// context.
+type fieldElement [10]int32
+
+func feZero(fe *fieldElement) {
+	for i := range fe {
+		fe[i] = 0
+	}
+}
+
+func feOne(fe *fieldElement) {
+	feZero(fe)
+	fe[0] = 1
+}
+
+func feAdd(dst, a, b *fieldElement) {
+	for i := range dst {
+		dst[i] = a[i] + b[i]
+	}
+}
+
+func feSub(dst, a, b *fieldElement) {
+	for i := range dst {
+		dst[i] = a[i] - b[i]
+	}
+}
+
+func feCopy(dst, src *fieldElement) {
+	for i := range dst {
+		dst[i] = src[i]
+	}
+}
+
+// feCSwap replaces (f,g) with (g,f) if b == 1; replaces (f,g) with (f,g) if b == 0.
+//
+// Preconditions: b in {0,1}.
+func feCSwap(f, g *fieldElement, b int32) {
+	b = -b
+	for i := range f {
+		t := b & (f[i] ^ g[i])
+		f[i] ^= t
+		g[i] ^= t
+	}
+}
+
+// load3 reads a 24-bit, little-endian value from in.
+func load3(in []byte) int64 {
+	var r int64
+	r = int64(in[0])
+	r |= int64(in[1]) << 8
+	r |= int64(in[2]) << 16
+	return r
+}
+
+// load4 reads a 32-bit, little-endian value from in.
+func load4(in []byte) int64 {
+	return int64(binary.LittleEndian.Uint32(in))
+}
+
+func feFromBytes(dst *fieldElement, src *[32]byte) {
+	h0 := load4(src[:])
+	h1 := load3(src[4:]) << 6
+	h2 := load3(src[7:]) << 5
+	h3 := load3(src[10:]) << 3
+	h4 := load3(src[13:]) << 2
+	h5 := load4(src[16:])
+	h6 := load3(src[20:]) << 7
+	h7 := load3(src[23:]) << 5
+	h8 := load3(src[26:]) << 4
+	h9 := (load3(src[29:]) & 0x7fffff) << 2
+
+	var carry [10]int64
+	carry[9] = (h9 + 1<<24) >> 25
+	h0 += carry[9] * 19
+	h9 -= carry[9] << 25
+	carry[1] = (h1 + 1<<24) >> 25
+	h2 += carry[1]
+	h1 -= carry[1] << 25
+	carry[3] = (h3 + 1<<24) >> 25
+	h4 += carry[3]
+	h3 -= carry[3] << 25
+	carry[5] = (h5 + 1<<24) >> 25
+	h6 += carry[5]
+	h5 -= carry[5] << 25
+	carry[7] = (h7 + 1<<24) >> 25
+	h8 += carry[7]
+	h7 -= carry[7] << 25
+
+	carry[0] = (h0 + 1<<25) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	carry[2] = (h2 + 1<<25) >> 26
+	h3 += carry[2]
+	h2 -= carry[2] << 26
+	carry[4] = (h4 + 1<<25) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	carry[6] = (h6 + 1<<25) >> 26
+	h7 += carry[6]
+	h6 -= carry[6] << 26
+	carry[8] = (h8 + 1<<25) >> 26
+	h9 += carry[8]
+	h8 -= carry[8] << 26
+
+	dst[0] = int32(h0)
+	dst[1] = int32(h1)
+	dst[2] = int32(h2)
+	dst[3] = int32(h3)
+	dst[4] = int32(h4)
+	dst[5] = int32(h5)
+	dst[6] = int32(h6)
+	dst[7] = int32(h7)
+	dst[8] = int32(h8)
+	dst[9] = int32(h9)
+}
+
+// feToBytes marshals h to s.
+// Preconditions:
+//   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+//
+// Write p=2^255-19; q=floor(h/p).
+// Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))).
+//
+// Proof:
+//   Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4.
+//   Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4.
+//
+//   Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9).
+//   Then 0<y<1.
+//
+//   Write r=h-pq.
+//   Have 0<=r<=p-1=2^255-20.
+//   Thus 0<=r+19(2^-255)r<r+19(2^-255)2^255<=2^255-1.
+//
+//   Write x=r+19(2^-255)r+y.
+//   Then 0<x<2^255 so floor(2^(-255)x) = 0 so floor(q+2^(-255)x) = q.
+//
+//   Have q+2^(-255)x = 2^(-255)(h + 19 2^(-25) h9 + 2^(-1))
+//   so floor(2^(-255)(h + 19 2^(-25) h9 + 2^(-1))) = q.
+func feToBytes(s *[32]byte, h *fieldElement) {
+	var carry [10]int32
+
+	q := (19*h[9] + (1 << 24)) >> 25
+	q = (h[0] + q) >> 26
+	q = (h[1] + q) >> 25
+	q = (h[2] + q) >> 26
+	q = (h[3] + q) >> 25
+	q = (h[4] + q) >> 26
+	q = (h[5] + q) >> 25
+	q = (h[6] + q) >> 26
+	q = (h[7] + q) >> 25
+	q = (h[8] + q) >> 26
+	q = (h[9] + q) >> 25
+
+	// Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20.
+	h[0] += 19 * q
+	// Goal: Output h-2^255 q, which is between 0 and 2^255-20.
+
+	carry[0] = h[0] >> 26
+	h[1] += carry[0]
+	h[0] -= carry[0] << 26
+	carry[1] = h[1] >> 25
+	h[2] += carry[1]
+	h[1] -= carry[1] << 25
+	carry[2] = h[2] >> 26
+	h[3] += carry[2]
+	h[2] -= carry[2] << 26
+	carry[3] = h[3] >> 25
+	h[4] += carry[3]
+	h[3] -= carry[3] << 25
+	carry[4] = h[4] >> 26
+	h[5] += carry[4]
+	h[4] -= carry[4] << 26
+	carry[5] = h[5] >> 25
+	h[6] += carry[5]
+	h[5] -= carry[5] << 25
+	carry[6] = h[6] >> 26
+	h[7] += carry[6]
+	h[6] -= carry[6] << 26
+	carry[7] = h[7] >> 25
+	h[8] += carry[7]
+	h[7] -= carry[7] << 25
+	carry[8] = h[8] >> 26
+	h[9] += carry[8]
+	h[8] -= carry[8] << 26
+	carry[9] = h[9] >> 25
+	h[9] -= carry[9] << 25
+	// h10 = carry9
+
+	// Goal: Output h[0]+...+2^255 h10-2^255 q, which is between 0 and 2^255-20.
+	// Have h[0]+...+2^230 h[9] between 0 and 2^255-1;
+	// evidently 2^255 h10-2^255 q = 0.
+	// Goal: Output h[0]+...+2^230 h[9].
+
+	s[0] = byte(h[0] >> 0)
+	s[1] = byte(h[0] >> 8)
+	s[2] = byte(h[0] >> 16)
+	s[3] = byte((h[0] >> 24) | (h[1] << 2))
+	s[4] = byte(h[1] >> 6)
+	s[5] = byte(h[1] >> 14)
+	s[6] = byte((h[1] >> 22) | (h[2] << 3))
+	s[7] = byte(h[2] >> 5)
+	s[8] = byte(h[2] >> 13)
+	s[9] = byte((h[2] >> 21) | (h[3] << 5))
+	s[10] = byte(h[3] >> 3)
+	s[11] = byte(h[3] >> 11)
+	s[12] = byte((h[3] >> 19) | (h[4] << 6))
+	s[13] = byte(h[4] >> 2)
+	s[14] = byte(h[4] >> 10)
+	s[15] = byte(h[4] >> 18)
+	s[16] = byte(h[5] >> 0)
+	s[17] = byte(h[5] >> 8)
+	s[18] = byte(h[5] >> 16)
+	s[19] = byte((h[5] >> 24) | (h[6] << 1))
+	s[20] = byte(h[6] >> 7)
+	s[21] = byte(h[6] >> 15)
+	s[22] = byte((h[6] >> 23) | (h[7] << 3))
+	s[23] = byte(h[7] >> 5)
+	s[24] = byte(h[7] >> 13)
+	s[25] = byte((h[7] >> 21) | (h[8] << 4))
+	s[26] = byte(h[8] >> 4)
+	s[27] = byte(h[8] >> 12)
+	s[28] = byte((h[8] >> 20) | (h[9] << 6))
+	s[29] = byte(h[9] >> 2)
+	s[30] = byte(h[9] >> 10)
+	s[31] = byte(h[9] >> 18)
+}
+
+// feMul calculates h = f * g
+// Can overlap h with f or g.
+//
+// Preconditions:
+//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+//    |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+//
+// Postconditions:
+//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+//
+// Notes on implementation strategy:
+//
+// Using schoolbook multiplication.
+// Karatsuba would save a little in some cost models.
+//
+// Most multiplications by 2 and 19 are 32-bit precomputations;
+// cheaper than 64-bit postcomputations.
+//
+// There is one remaining multiplication by 19 in the carry chain;
+// one *19 precomputation can be merged into this,
+// but the resulting data flow is considerably less clean.
+//
+// There are 12 carries below.
+// 10 of them are 2-way parallelizable and vectorizable.
+// Can get away with 11 carries, but then data flow is much deeper.
+//
+// With tighter constraints on inputs can squeeze carries into int32.
+func feMul(h, f, g *fieldElement) {
+	f0 := f[0]
+	f1 := f[1]
+	f2 := f[2]
+	f3 := f[3]
+	f4 := f[4]
+	f5 := f[5]
+	f6 := f[6]
+	f7 := f[7]
+	f8 := f[8]
+	f9 := f[9]
+	g0 := g[0]
+	g1 := g[1]
+	g2 := g[2]
+	g3 := g[3]
+	g4 := g[4]
+	g5 := g[5]
+	g6 := g[6]
+	g7 := g[7]
+	g8 := g[8]
+	g9 := g[9]
+	g1_19 := 19 * g1 // 1.4*2^29
+	g2_19 := 19 * g2 // 1.4*2^30; still ok
+	g3_19 := 19 * g3
+	g4_19 := 19 * g4
+	g5_19 := 19 * g5
+	g6_19 := 19 * g6
+	g7_19 := 19 * g7
+	g8_19 := 19 * g8
+	g9_19 := 19 * g9
+	f1_2 := 2 * f1
+	f3_2 := 2 * f3
+	f5_2 := 2 * f5
+	f7_2 := 2 * f7
+	f9_2 := 2 * f9
+	f0g0 := int64(f0) * int64(g0)
+	f0g1 := int64(f0) * int64(g1)
+	f0g2 := int64(f0) * int64(g2)
+	f0g3 := int64(f0) * int64(g3)
+	f0g4 := int64(f0) * int64(g4)
+	f0g5 := int64(f0) * int64(g5)
+	f0g6 := int64(f0) * int64(g6)
+	f0g7 := int64(f0) * int64(g7)
+	f0g8 := int64(f0) * int64(g8)
+	f0g9 := int64(f0) * int64(g9)
+	f1g0 := int64(f1) * int64(g0)
+	f1g1_2 := int64(f1_2) * int64(g1)
+	f1g2 := int64(f1) * int64(g2)
+	f1g3_2 := int64(f1_2) * int64(g3)
+	f1g4 := int64(f1) * int64(g4)
+	f1g5_2 := int64(f1_2) * int64(g5)
+	f1g6 := int64(f1) * int64(g6)
+	f1g7_2 := int64(f1_2) * int64(g7)
+	f1g8 := int64(f1) * int64(g8)
+	f1g9_38 := int64(f1_2) * int64(g9_19)
+	f2g0 := int64(f2) * int64(g0)
+	f2g1 := int64(f2) * int64(g1)
+	f2g2 := int64(f2) * int64(g2)
+	f2g3 := int64(f2) * int64(g3)
+	f2g4 := int64(f2) * int64(g4)
+	f2g5 := int64(f2) * int64(g5)
+	f2g6 := int64(f2) * int64(g6)
+	f2g7 := int64(f2) * int64(g7)
+	f2g8_19 := int64(f2) * int64(g8_19)
+	f2g9_19 := int64(f2) * int64(g9_19)
+	f3g0 := int64(f3) * int64(g0)
+	f3g1_2 := int64(f3_2) * int64(g1)
+	f3g2 := int64(f3) * int64(g2)
+	f3g3_2 := int64(f3_2) * int64(g3)
+	f3g4 := int64(f3) * int64(g4)
+	f3g5_2 := int64(f3_2) * int64(g5)
+	f3g6 := int64(f3) * int64(g6)
+	f3g7_38 := int64(f3_2) * int64(g7_19)
+	f3g8_19 := int64(f3) * int64(g8_19)
+	f3g9_38 := int64(f3_2) * int64(g9_19)
+	f4g0 := int64(f4) * int64(g0)
+	f4g1 := int64(f4) * int64(g1)
+	f4g2 := int64(f4) * int64(g2)
+	f4g3 := int64(f4) * int64(g3)
+	f4g4 := int64(f4) * int64(g4)
+	f4g5 := int64(f4) * int64(g5)
+	f4g6_19 := int64(f4) * int64(g6_19)
+	f4g7_19 := int64(f4) * int64(g7_19)
+	f4g8_19 := int64(f4) * int64(g8_19)
+	f4g9_19 := int64(f4) * int64(g9_19)
+	f5g0 := int64(f5) * int64(g0)
+	f5g1_2 := int64(f5_2) * int64(g1)
+	f5g2 := int64(f5) * int64(g2)
+	f5g3_2 := int64(f5_2) * int64(g3)
+	f5g4 := int64(f5) * int64(g4)
+	f5g5_38 := int64(f5_2) * int64(g5_19)
+	f5g6_19 := int64(f5) * int64(g6_19)
+	f5g7_38 := int64(f5_2) * int64(g7_19)
+	f5g8_19 := int64(f5) * int64(g8_19)
+	f5g9_38 := int64(f5_2) * int64(g9_19)
+	f6g0 := int64(f6) * int64(g0)
+	f6g1 := int64(f6) * int64(g1)
+	f6g2 := int64(f6) * int64(g2)
+	f6g3 := int64(f6) * int64(g3)
+	f6g4_19 := int64(f6) * int64(g4_19)
+	f6g5_19 := int64(f6) * int64(g5_19)
+	f6g6_19 := int64(f6) * int64(g6_19)
+	f6g7_19 := int64(f6) * int64(g7_19)
+	f6g8_19 := int64(f6) * int64(g8_19)
+	f6g9_19 := int64(f6) * int64(g9_19)
+	f7g0 := int64(f7) * int64(g0)
+	f7g1_2 := int64(f7_2) * int64(g1)
+	f7g2 := int64(f7) * int64(g2)
+	f7g3_38 := int64(f7_2) * int64(g3_19)
+	f7g4_19 := int64(f7) * int64(g4_19)
+	f7g5_38 := int64(f7_2) * int64(g5_19)
+	f7g6_19 := int64(f7) * int64(g6_19)
+	f7g7_38 := int64(f7_2) * int64(g7_19)
+	f7g8_19 := int64(f7) * int64(g8_19)
+	f7g9_38 := int64(f7_2) * int64(g9_19)
+	f8g0 := int64(f8) * int64(g0)
+	f8g1 := int64(f8) * int64(g1)
+	f8g2_19 := int64(f8) * int64(g2_19)
+	f8g3_19 := int64(f8) * int64(g3_19)
+	f8g4_19 := int64(f8) * int64(g4_19)
+	f8g5_19 := int64(f8) * int64(g5_19)
+	f8g6_19 := int64(f8) * int64(g6_19)
+	f8g7_19 := int64(f8) * int64(g7_19)
+	f8g8_19 := int64(f8) * int64(g8_19)
+	f8g9_19 := int64(f8) * int64(g9_19)
+	f9g0 := int64(f9) * int64(g0)
+	f9g1_38 := int64(f9_2) * int64(g1_19)
+	f9g2_19 := int64(f9) * int64(g2_19)
+	f9g3_38 := int64(f9_2) * int64(g3_19)
+	f9g4_19 := int64(f9) * int64(g4_19)
+	f9g5_38 := int64(f9_2) * int64(g5_19)
+	f9g6_19 := int64(f9) * int64(g6_19)
+	f9g7_38 := int64(f9_2) * int64(g7_19)
+	f9g8_19 := int64(f9) * int64(g8_19)
+	f9g9_38 := int64(f9_2) * int64(g9_19)
+	h0 := f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38
+	h1 := f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19
+	h2 := f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38
+	h3 := f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19
+	h4 := f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38
+	h5 := f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19
+	h6 := f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38
+	h7 := f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19
+	h8 := f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38
+	h9 := f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0
+	var carry [10]int64
+
+	// |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38))
+	//   i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8
+	// |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19))
+	//   i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	// |h0| <= 2^25
+	// |h4| <= 2^25
+	// |h1| <= 1.51*2^58
+	// |h5| <= 1.51*2^58
+
+	carry[1] = (h1 + (1 << 24)) >> 25
+	h2 += carry[1]
+	h1 -= carry[1] << 25
+	carry[5] = (h5 + (1 << 24)) >> 25
+	h6 += carry[5]
+	h5 -= carry[5] << 25
+	// |h1| <= 2^24; from now on fits into int32
+	// |h5| <= 2^24; from now on fits into int32
+	// |h2| <= 1.21*2^59
+	// |h6| <= 1.21*2^59
+
+	carry[2] = (h2 + (1 << 25)) >> 26
+	h3 += carry[2]
+	h2 -= carry[2] << 26
+	carry[6] = (h6 + (1 << 25)) >> 26
+	h7 += carry[6]
+	h6 -= carry[6] << 26
+	// |h2| <= 2^25; from now on fits into int32 unchanged
+	// |h6| <= 2^25; from now on fits into int32 unchanged
+	// |h3| <= 1.51*2^58
+	// |h7| <= 1.51*2^58
+
+	carry[3] = (h3 + (1 << 24)) >> 25
+	h4 += carry[3]
+	h3 -= carry[3] << 25
+	carry[7] = (h7 + (1 << 24)) >> 25
+	h8 += carry[7]
+	h7 -= carry[7] << 25
+	// |h3| <= 2^24; from now on fits into int32 unchanged
+	// |h7| <= 2^24; from now on fits into int32 unchanged
+	// |h4| <= 1.52*2^33
+	// |h8| <= 1.52*2^33
+
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	carry[8] = (h8 + (1 << 25)) >> 26
+	h9 += carry[8]
+	h8 -= carry[8] << 26
+	// |h4| <= 2^25; from now on fits into int32 unchanged
+	// |h8| <= 2^25; from now on fits into int32 unchanged
+	// |h5| <= 1.01*2^24
+	// |h9| <= 1.51*2^58
+
+	carry[9] = (h9 + (1 << 24)) >> 25
+	h0 += carry[9] * 19
+	h9 -= carry[9] << 25
+	// |h9| <= 2^24; from now on fits into int32 unchanged
+	// |h0| <= 1.8*2^37
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	// |h0| <= 2^25; from now on fits into int32 unchanged
+	// |h1| <= 1.01*2^24
+
+	h[0] = int32(h0)
+	h[1] = int32(h1)
+	h[2] = int32(h2)
+	h[3] = int32(h3)
+	h[4] = int32(h4)
+	h[5] = int32(h5)
+	h[6] = int32(h6)
+	h[7] = int32(h7)
+	h[8] = int32(h8)
+	h[9] = int32(h9)
+}
+
+// feSquare calculates h = f*f. Can overlap h with f.
+//
+// Preconditions:
+//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+//
+// Postconditions:
+//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+func feSquare(h, f *fieldElement) {
+	f0 := f[0]
+	f1 := f[1]
+	f2 := f[2]
+	f3 := f[3]
+	f4 := f[4]
+	f5 := f[5]
+	f6 := f[6]
+	f7 := f[7]
+	f8 := f[8]
+	f9 := f[9]
+	f0_2 := 2 * f0
+	f1_2 := 2 * f1
+	f2_2 := 2 * f2
+	f3_2 := 2 * f3
+	f4_2 := 2 * f4
+	f5_2 := 2 * f5
+	f6_2 := 2 * f6
+	f7_2 := 2 * f7
+	f5_38 := 38 * f5 // 1.31*2^30
+	f6_19 := 19 * f6 // 1.31*2^30
+	f7_38 := 38 * f7 // 1.31*2^30
+	f8_19 := 19 * f8 // 1.31*2^30
+	f9_38 := 38 * f9 // 1.31*2^30
+	f0f0 := int64(f0) * int64(f0)
+	f0f1_2 := int64(f0_2) * int64(f1)
+	f0f2_2 := int64(f0_2) * int64(f2)
+	f0f3_2 := int64(f0_2) * int64(f3)
+	f0f4_2 := int64(f0_2) * int64(f4)
+	f0f5_2 := int64(f0_2) * int64(f5)
+	f0f6_2 := int64(f0_2) * int64(f6)
+	f0f7_2 := int64(f0_2) * int64(f7)
+	f0f8_2 := int64(f0_2) * int64(f8)
+	f0f9_2 := int64(f0_2) * int64(f9)
+	f1f1_2 := int64(f1_2) * int64(f1)
+	f1f2_2 := int64(f1_2) * int64(f2)
+	f1f3_4 := int64(f1_2) * int64(f3_2)
+	f1f4_2 := int64(f1_2) * int64(f4)
+	f1f5_4 := int64(f1_2) * int64(f5_2)
+	f1f6_2 := int64(f1_2) * int64(f6)
+	f1f7_4 := int64(f1_2) * int64(f7_2)
+	f1f8_2 := int64(f1_2) * int64(f8)
+	f1f9_76 := int64(f1_2) * int64(f9_38)
+	f2f2 := int64(f2) * int64(f2)
+	f2f3_2 := int64(f2_2) * int64(f3)
+	f2f4_2 := int64(f2_2) * int64(f4)
+	f2f5_2 := int64(f2_2) * int64(f5)
+	f2f6_2 := int64(f2_2) * int64(f6)
+	f2f7_2 := int64(f2_2) * int64(f7)
+	f2f8_38 := int64(f2_2) * int64(f8_19)
+	f2f9_38 := int64(f2) * int64(f9_38)
+	f3f3_2 := int64(f3_2) * int64(f3)
+	f3f4_2 := int64(f3_2) * int64(f4)
+	f3f5_4 := int64(f3_2) * int64(f5_2)
+	f3f6_2 := int64(f3_2) * int64(f6)
+	f3f7_76 := int64(f3_2) * int64(f7_38)
+	f3f8_38 := int64(f3_2) * int64(f8_19)
+	f3f9_76 := int64(f3_2) * int64(f9_38)
+	f4f4 := int64(f4) * int64(f4)
+	f4f5_2 := int64(f4_2) * int64(f5)
+	f4f6_38 := int64(f4_2) * int64(f6_19)
+	f4f7_38 := int64(f4) * int64(f7_38)
+	f4f8_38 := int64(f4_2) * int64(f8_19)
+	f4f9_38 := int64(f4) * int64(f9_38)
+	f5f5_38 := int64(f5) * int64(f5_38)
+	f5f6_38 := int64(f5_2) * int64(f6_19)
+	f5f7_76 := int64(f5_2) * int64(f7_38)
+	f5f8_38 := int64(f5_2) * int64(f8_19)
+	f5f9_76 := int64(f5_2) * int64(f9_38)
+	f6f6_19 := int64(f6) * int64(f6_19)
+	f6f7_38 := int64(f6) * int64(f7_38)
+	f6f8_38 := int64(f6_2) * int64(f8_19)
+	f6f9_38 := int64(f6) * int64(f9_38)
+	f7f7_38 := int64(f7) * int64(f7_38)
+	f7f8_38 := int64(f7_2) * int64(f8_19)
+	f7f9_76 := int64(f7_2) * int64(f9_38)
+	f8f8_19 := int64(f8) * int64(f8_19)
+	f8f9_38 := int64(f8) * int64(f9_38)
+	f9f9_38 := int64(f9) * int64(f9_38)
+	h0 := f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38
+	h1 := f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38
+	h2 := f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19
+	h3 := f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38
+	h4 := f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38
+	h5 := f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38
+	h6 := f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19
+	h7 := f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38
+	h8 := f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38
+	h9 := f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2
+	var carry [10]int64
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+
+	carry[1] = (h1 + (1 << 24)) >> 25
+	h2 += carry[1]
+	h1 -= carry[1] << 25
+	carry[5] = (h5 + (1 << 24)) >> 25
+	h6 += carry[5]
+	h5 -= carry[5] << 25
+
+	carry[2] = (h2 + (1 << 25)) >> 26
+	h3 += carry[2]
+	h2 -= carry[2] << 26
+	carry[6] = (h6 + (1 << 25)) >> 26
+	h7 += carry[6]
+	h6 -= carry[6] << 26
+
+	carry[3] = (h3 + (1 << 24)) >> 25
+	h4 += carry[3]
+	h3 -= carry[3] << 25
+	carry[7] = (h7 + (1 << 24)) >> 25
+	h8 += carry[7]
+	h7 -= carry[7] << 25
+
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	carry[8] = (h8 + (1 << 25)) >> 26
+	h9 += carry[8]
+	h8 -= carry[8] << 26
+
+	carry[9] = (h9 + (1 << 24)) >> 25
+	h0 += carry[9] * 19
+	h9 -= carry[9] << 25
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+
+	h[0] = int32(h0)
+	h[1] = int32(h1)
+	h[2] = int32(h2)
+	h[3] = int32(h3)
+	h[4] = int32(h4)
+	h[5] = int32(h5)
+	h[6] = int32(h6)
+	h[7] = int32(h7)
+	h[8] = int32(h8)
+	h[9] = int32(h9)
+}
+
+// feMul121666 calculates h = f * 121666. Can overlap h with f.
+//
+// Preconditions:
+//    |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
+//
+// Postconditions:
+//    |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
+func feMul121666(h, f *fieldElement) {
+	h0 := int64(f[0]) * 121666
+	h1 := int64(f[1]) * 121666
+	h2 := int64(f[2]) * 121666
+	h3 := int64(f[3]) * 121666
+	h4 := int64(f[4]) * 121666
+	h5 := int64(f[5]) * 121666
+	h6 := int64(f[6]) * 121666
+	h7 := int64(f[7]) * 121666
+	h8 := int64(f[8]) * 121666
+	h9 := int64(f[9]) * 121666
+	var carry [10]int64
+
+	carry[9] = (h9 + (1 << 24)) >> 25
+	h0 += carry[9] * 19
+	h9 -= carry[9] << 25
+	carry[1] = (h1 + (1 << 24)) >> 25
+	h2 += carry[1]
+	h1 -= carry[1] << 25
+	carry[3] = (h3 + (1 << 24)) >> 25
+	h4 += carry[3]
+	h3 -= carry[3] << 25
+	carry[5] = (h5 + (1 << 24)) >> 25
+	h6 += carry[5]
+	h5 -= carry[5] << 25
+	carry[7] = (h7 + (1 << 24)) >> 25
+	h8 += carry[7]
+	h7 -= carry[7] << 25
+
+	carry[0] = (h0 + (1 << 25)) >> 26
+	h1 += carry[0]
+	h0 -= carry[0] << 26
+	carry[2] = (h2 + (1 << 25)) >> 26
+	h3 += carry[2]
+	h2 -= carry[2] << 26
+	carry[4] = (h4 + (1 << 25)) >> 26
+	h5 += carry[4]
+	h4 -= carry[4] << 26
+	carry[6] = (h6 + (1 << 25)) >> 26
+	h7 += carry[6]
+	h6 -= carry[6] << 26
+	carry[8] = (h8 + (1 << 25)) >> 26
+	h9 += carry[8]
+	h8 -= carry[8] << 26
+
+	h[0] = int32(h0)
+	h[1] = int32(h1)
+	h[2] = int32(h2)
+	h[3] = int32(h3)
+	h[4] = int32(h4)
+	h[5] = int32(h5)
+	h[6] = int32(h6)
+	h[7] = int32(h7)
+	h[8] = int32(h8)
+	h[9] = int32(h9)
+}
+
+// feInvert sets out = z^-1.
+func feInvert(out, z *fieldElement) {
+	var t0, t1, t2, t3 fieldElement
+	var i int
+
+	feSquare(&t0, z)
+	for i = 1; i < 1; i++ {
+		feSquare(&t0, &t0)
+	}
+	feSquare(&t1, &t0)
+	for i = 1; i < 2; i++ {
+		feSquare(&t1, &t1)
+	}
+	feMul(&t1, z, &t1)
+	feMul(&t0, &t0, &t1)
+	feSquare(&t2, &t0)
+	for i = 1; i < 1; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t1, &t1, &t2)
+	feSquare(&t2, &t1)
+	for i = 1; i < 5; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t1, &t2, &t1)
+	feSquare(&t2, &t1)
+	for i = 1; i < 10; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t2, &t2, &t1)
+	feSquare(&t3, &t2)
+	for i = 1; i < 20; i++ {
+		feSquare(&t3, &t3)
+	}
+	feMul(&t2, &t3, &t2)
+	feSquare(&t2, &t2)
+	for i = 1; i < 10; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t1, &t2, &t1)
+	feSquare(&t2, &t1)
+	for i = 1; i < 50; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t2, &t2, &t1)
+	feSquare(&t3, &t2)
+	for i = 1; i < 100; i++ {
+		feSquare(&t3, &t3)
+	}
+	feMul(&t2, &t3, &t2)
+	feSquare(&t2, &t2)
+	for i = 1; i < 50; i++ {
+		feSquare(&t2, &t2)
+	}
+	feMul(&t1, &t2, &t1)
+	feSquare(&t1, &t1)
+	for i = 1; i < 5; i++ {
+		feSquare(&t1, &t1)
+	}
+	feMul(out, &t1, &t0)
+}
+
+func scalarMultGeneric(out, in, base *[32]byte) {
+	var e [32]byte
+
+	copy(e[:], in[:])
+	e[0] &= 248
+	e[31] &= 127
+	e[31] |= 64
+
+	var x1, x2, z2, x3, z3, tmp0, tmp1 fieldElement
+	feFromBytes(&x1, base)
+	feOne(&x2)
+	feCopy(&x3, &x1)
+	feOne(&z3)
+
+	swap := int32(0)
+	for pos := 254; pos >= 0; pos-- {
+		b := e[pos/8] >> uint(pos&7)
+		b &= 1
+		swap ^= int32(b)
+		feCSwap(&x2, &x3, swap)
+		feCSwap(&z2, &z3, swap)
+		swap = int32(b)
+
+		feSub(&tmp0, &x3, &z3)
+		feSub(&tmp1, &x2, &z2)
+		feAdd(&x2, &x2, &z2)
+		feAdd(&z2, &x3, &z3)
+		feMul(&z3, &tmp0, &x2)
+		feMul(&z2, &z2, &tmp1)
+		feSquare(&tmp0, &tmp1)
+		feSquare(&tmp1, &x2)
+		feAdd(&x3, &z3, &z2)
+		feSub(&z2, &z3, &z2)
+		feMul(&x2, &tmp1, &tmp0)
+		feSub(&tmp1, &tmp1, &tmp0)
+		feSquare(&z2, &z2)
+		feMul121666(&z3, &tmp1)
+		feSquare(&x3, &x3)
+		feAdd(&tmp0, &tmp0, &z3)
+		feMul(&z3, &x1, &z2)
+		feMul(&z2, &tmp1, &tmp0)
+	}
+
+	feCSwap(&x2, &x3, swap)
+	feCSwap(&z2, &z3, swap)
+
+	feInvert(&z2, &z2)
+	feMul(&x2, &x2, &z2)
+	feToBytes(out, &x2)
+}
--- a/vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go
+++ b/vendor/golang.org/x/crypto/curve25519/curve25519_noasm.go
@ -0,0 +1,11 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64 gccgo appengine purego
+
+package curve25519
+
+func scalarMult(out, in, base *[32]byte) {
+	scalarMultGeneric(out, in, base)
+}
--- a/vendor/golang.org/x/crypto/curve25519/doc.go
+++ b/vendor/golang.org/x/crypto/curve25519/doc.go
@ -1,23 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package curve25519 provides an implementation of scalar multiplication on
-// the elliptic curve known as curve25519. See https://cr.yp.to/ecdh.html
-package curve25519 // import "golang.org/x/crypto/curve25519"
-
-// basePoint is the x coordinate of the generator of the curve.
-var basePoint = [32]byte{9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
-
-// ScalarMult sets dst to the product in*base where dst and base are the x
-// coordinates of group points and all values are in little-endian form.
-func ScalarMult(dst, in, base *[32]byte) {
-	scalarMult(dst, in, base)
-}
-
-// ScalarBaseMult sets dst to the product in*base where dst and base are the x
-// coordinates of group points, base is the standard generator and all values
-// are in little-endian form.
-func ScalarBaseMult(dst, in *[32]byte) {
-	ScalarMult(dst, in, &basePoint)
-}
--- a/vendor/golang.org/x/crypto/curve25519/freeze_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/freeze_amd64.s
@ -1,73 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// +build amd64,!gccgo,!appengine
-
-#include "const_amd64.h"
-
-// func freeze(inout *[5]uint64)
-TEXT ·freeze(SB),7,$0-8
-	MOVQ inout+0(FP), DI
-
-	MOVQ 0(DI),SI
-	MOVQ 8(DI),DX
-	MOVQ 16(DI),CX
-	MOVQ 24(DI),R8
-	MOVQ 32(DI),R9
-	MOVQ $REDMASK51,AX
-	MOVQ AX,R10
-	SUBQ $18,R10
-	MOVQ $3,R11
-REDUCELOOP:
-	MOVQ SI,R12
-	SHRQ $51,R12
-	ANDQ AX,SI
-	ADDQ R12,DX
-	MOVQ DX,R12
-	SHRQ $51,R12
-	ANDQ AX,DX
-	ADDQ R12,CX
-	MOVQ CX,R12
-	SHRQ $51,R12
-	ANDQ AX,CX
-	ADDQ R12,R8
-	MOVQ R8,R12
-	SHRQ $51,R12
-	ANDQ AX,R8
-	ADDQ R12,R9
-	MOVQ R9,R12
-	SHRQ $51,R12
-	ANDQ AX,R9
-	IMUL3Q $19,R12,R12
-	ADDQ R12,SI
-	SUBQ $1,R11
-	JA REDUCELOOP
-	MOVQ $1,R12
-	CMPQ R10,SI
-	CMOVQLT R11,R12
-	CMPQ AX,DX
-	CMOVQNE R11,R12
-	CMPQ AX,CX
-	CMOVQNE R11,R12
-	CMPQ AX,R8
-	CMOVQNE R11,R12
-	CMPQ AX,R9
-	CMOVQNE R11,R12
-	NEGQ R12
-	ANDQ R12,AX
-	ANDQ R12,R10
-	SUBQ R10,SI
-	SUBQ AX,DX
-	SUBQ AX,CX
-	SUBQ AX,R8
-	SUBQ AX,R9
-	MOVQ SI,0(DI)
-	MOVQ DX,8(DI)
-	MOVQ CX,16(DI)
-	MOVQ R8,24(DI)
-	MOVQ R9,32(DI)
-	RET
--- a/vendor/golang.org/x/crypto/curve25519/mul_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/mul_amd64.s
@ -1,169 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// +build amd64,!gccgo,!appengine
-
-#include "const_amd64.h"
-
-// func mul(dest, a, b *[5]uint64)
-TEXT ·mul(SB),0,$16-24
-	MOVQ dest+0(FP), DI
-	MOVQ a+8(FP), SI
-	MOVQ b+16(FP), DX
-
-	MOVQ DX,CX
-	MOVQ 24(SI),DX
-	IMUL3Q $19,DX,AX
-	MOVQ AX,0(SP)
-	MULQ 16(CX)
-	MOVQ AX,R8
-	MOVQ DX,R9
-	MOVQ 32(SI),DX
-	IMUL3Q $19,DX,AX
-	MOVQ AX,8(SP)
-	MULQ 8(CX)
-	ADDQ AX,R8
-	ADCQ DX,R9
-	MOVQ 0(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,R8
-	ADCQ DX,R9
-	MOVQ 0(SI),AX
-	MULQ 8(CX)
-	MOVQ AX,R10
-	MOVQ DX,R11
-	MOVQ 0(SI),AX
-	MULQ 16(CX)
-	MOVQ AX,R12
-	MOVQ DX,R13
-	MOVQ 0(SI),AX
-	MULQ 24(CX)
-	MOVQ AX,R14
-	MOVQ DX,R15
-	MOVQ 0(SI),AX
-	MULQ 32(CX)
-	MOVQ AX,BX
-	MOVQ DX,BP
-	MOVQ 8(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,R10
-	ADCQ DX,R11
-	MOVQ 8(SI),AX
-	MULQ 8(CX)
-	ADDQ AX,R12
-	ADCQ DX,R13
-	MOVQ 8(SI),AX
-	MULQ 16(CX)
-	ADDQ AX,R14
-	ADCQ DX,R15
-	MOVQ 8(SI),AX
-	MULQ 24(CX)
-	ADDQ AX,BX
-	ADCQ DX,BP
-	MOVQ 8(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 32(CX)
-	ADDQ AX,R8
-	ADCQ DX,R9
-	MOVQ 16(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,R12
-	ADCQ DX,R13
-	MOVQ 16(SI),AX
-	MULQ 8(CX)
-	ADDQ AX,R14
-	ADCQ DX,R15
-	MOVQ 16(SI),AX
-	MULQ 16(CX)
-	ADDQ AX,BX
-	ADCQ DX,BP
-	MOVQ 16(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 24(CX)
-	ADDQ AX,R8
-	ADCQ DX,R9
-	MOVQ 16(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 32(CX)
-	ADDQ AX,R10
-	ADCQ DX,R11
-	MOVQ 24(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,R14
-	ADCQ DX,R15
-	MOVQ 24(SI),AX
-	MULQ 8(CX)
-	ADDQ AX,BX
-	ADCQ DX,BP
-	MOVQ 0(SP),AX
-	MULQ 24(CX)
-	ADDQ AX,R10
-	ADCQ DX,R11
-	MOVQ 0(SP),AX
-	MULQ 32(CX)
-	ADDQ AX,R12
-	ADCQ DX,R13
-	MOVQ 32(SI),AX
-	MULQ 0(CX)
-	ADDQ AX,BX
-	ADCQ DX,BP
-	MOVQ 8(SP),AX
-	MULQ 16(CX)
-	ADDQ AX,R10
-	ADCQ DX,R11
-	MOVQ 8(SP),AX
-	MULQ 24(CX)
-	ADDQ AX,R12
-	ADCQ DX,R13
-	MOVQ 8(SP),AX
-	MULQ 32(CX)
-	ADDQ AX,R14
-	ADCQ DX,R15
-	MOVQ $REDMASK51,SI
-	SHLQ $13,R9:R8
-	ANDQ SI,R8
-	SHLQ $13,R11:R10
-	ANDQ SI,R10
-	ADDQ R9,R10
-	SHLQ $13,R13:R12
-	ANDQ SI,R12
-	ADDQ R11,R12
-	SHLQ $13,R15:R14
-	ANDQ SI,R14
-	ADDQ R13,R14
-	SHLQ $13,BP:BX
-	ANDQ SI,BX
-	ADDQ R15,BX
-	IMUL3Q $19,BP,DX
-	ADDQ DX,R8
-	MOVQ R8,DX
-	SHRQ $51,DX
-	ADDQ R10,DX
-	MOVQ DX,CX
-	SHRQ $51,DX
-	ANDQ SI,R8
-	ADDQ R12,DX
-	MOVQ DX,R9
-	SHRQ $51,DX
-	ANDQ SI,CX
-	ADDQ R14,DX
-	MOVQ DX,AX
-	SHRQ $51,DX
-	ANDQ SI,R9
-	ADDQ BX,DX
-	MOVQ DX,R10
-	SHRQ $51,DX
-	ANDQ SI,AX
-	IMUL3Q $19,DX,DX
-	ADDQ DX,R8
-	ANDQ SI,R10
-	MOVQ R8,0(DI)
-	MOVQ CX,8(DI)
-	MOVQ R9,16(DI)
-	MOVQ AX,24(DI)
-	MOVQ R10,32(DI)
-	RET
--- a/vendor/golang.org/x/crypto/curve25519/square_amd64.s
+++ b/vendor/golang.org/x/crypto/curve25519/square_amd64.s
@ -1,132 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
-
-// +build amd64,!gccgo,!appengine
-
-#include "const_amd64.h"
-
-// func square(out, in *[5]uint64)
-TEXT ·square(SB),7,$0-16
-	MOVQ out+0(FP), DI
-	MOVQ in+8(FP), SI
-
-	MOVQ 0(SI),AX
-	MULQ 0(SI)
-	MOVQ AX,CX
-	MOVQ DX,R8
-	MOVQ 0(SI),AX
-	SHLQ $1,AX
-	MULQ 8(SI)
-	MOVQ AX,R9
-	MOVQ DX,R10
-	MOVQ 0(SI),AX
-	SHLQ $1,AX
-	MULQ 16(SI)
-	MOVQ AX,R11
-	MOVQ DX,R12
-	MOVQ 0(SI),AX
-	SHLQ $1,AX
-	MULQ 24(SI)
-	MOVQ AX,R13
-	MOVQ DX,R14
-	MOVQ 0(SI),AX
-	SHLQ $1,AX
-	MULQ 32(SI)
-	MOVQ AX,R15
-	MOVQ DX,BX
-	MOVQ 8(SI),AX
-	MULQ 8(SI)
-	ADDQ AX,R11
-	ADCQ DX,R12
-	MOVQ 8(SI),AX
-	SHLQ $1,AX
-	MULQ 16(SI)
-	ADDQ AX,R13
-	ADCQ DX,R14
-	MOVQ 8(SI),AX
-	SHLQ $1,AX
-	MULQ 24(SI)
-	ADDQ AX,R15
-	ADCQ DX,BX
-	MOVQ 8(SI),DX
-	IMUL3Q $38,DX,AX
-	MULQ 32(SI)
-	ADDQ AX,CX
-	ADCQ DX,R8
-	MOVQ 16(SI),AX
-	MULQ 16(SI)
-	ADDQ AX,R15
-	ADCQ DX,BX
-	MOVQ 16(SI),DX
-	IMUL3Q $38,DX,AX
-	MULQ 24(SI)
-	ADDQ AX,CX
-	ADCQ DX,R8
-	MOVQ 16(SI),DX
-	IMUL3Q $38,DX,AX
-	MULQ 32(SI)
-	ADDQ AX,R9
-	ADCQ DX,R10
-	MOVQ 24(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 24(SI)
-	ADDQ AX,R9
-	ADCQ DX,R10
-	MOVQ 24(SI),DX
-	IMUL3Q $38,DX,AX
-	MULQ 32(SI)
-	ADDQ AX,R11
-	ADCQ DX,R12
-	MOVQ 32(SI),DX
-	IMUL3Q $19,DX,AX
-	MULQ 32(SI)
-	ADDQ AX,R13
-	ADCQ DX,R14
-	MOVQ $REDMASK51,SI
-	SHLQ $13,R8:CX
-	ANDQ SI,CX
-	SHLQ $13,R10:R9
-	ANDQ SI,R9
-	ADDQ R8,R9
-	SHLQ $13,R12:R11
-	ANDQ SI,R11
-	ADDQ R10,R11
-	SHLQ $13,R14:R13
-	ANDQ SI,R13
-	ADDQ R12,R13
-	SHLQ $13,BX:R15
-	ANDQ SI,R15
-	ADDQ R14,R15
-	IMUL3Q $19,BX,DX
-	ADDQ DX,CX
-	MOVQ CX,DX
-	SHRQ $51,DX
-	ADDQ R9,DX
-	ANDQ SI,CX
-	MOVQ DX,R8
-	SHRQ $51,DX
-	ADDQ R11,DX
-	ANDQ SI,R8
-	MOVQ DX,R9
-	SHRQ $51,DX
-	ADDQ R13,DX
-	ANDQ SI,R9
-	MOVQ DX,AX
-	SHRQ $51,DX
-	ADDQ R15,DX
-	ANDQ SI,AX
-	MOVQ DX,R10
-	SHRQ $51,DX
-	IMUL3Q $19,DX,DX
-	ADDQ DX,CX
-	ANDQ SI,R10
-	MOVQ CX,0(DI)
-	MOVQ R8,8(DI)
-	MOVQ R9,16(DI)
-	MOVQ AX,24(DI)
-	MOVQ R10,32(DI)
-	RET
--- a/vendor/golang.org/x/crypto/ed25519/ed25519.go
+++ b/vendor/golang.org/x/crypto/ed25519/ed25519.go
@ -2,6 +2,11 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+// In Go 1.13, the ed25519 package was promoted to the standard library as
+// crypto/ed25519, and this package became a wrapper for the standard library one.
+//
+// +build !go1.13
+
 // Package ed25519 implements the Ed25519 signature algorithm. See
 // https://ed25519.cr.yp.to/.
 //
--- a/vendor/golang.org/x/crypto/ed25519/ed25519_go113.go
+++ b/vendor/golang.org/x/crypto/ed25519/ed25519_go113.go
@ -0,0 +1,73 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build go1.13
+
+// Package ed25519 implements the Ed25519 signature algorithm. See
+// https://ed25519.cr.yp.to/.
+//
+// These functions are also compatible with the “Ed25519” function defined in
+// RFC 8032. However, unlike RFC 8032's formulation, this package's private key
+// representation includes a public key suffix to make multiple signing
+// operations with the same key more efficient. This package refers to the RFC
+// 8032 private key as the “seed”.
+//
+// Beginning with Go 1.13, the functionality of this package was moved to the
+// standard library as crypto/ed25519. This package only acts as a compatibility
+// wrapper.
+package ed25519
+
+import (
+	"crypto/ed25519"
+	"io"
+)
+
+const (
+	// PublicKeySize is the size, in bytes, of public keys as used in this package.
+	PublicKeySize = 32
+	// PrivateKeySize is the size, in bytes, of private keys as used in this package.
+	PrivateKeySize = 64
+	// SignatureSize is the size, in bytes, of signatures generated and verified by this package.
+	SignatureSize = 64
+	// SeedSize is the size, in bytes, of private key seeds. These are the private key representations used by RFC 8032.
+	SeedSize = 32
+)
+
+// PublicKey is the type of Ed25519 public keys.
+//
+// This type is an alias for crypto/ed25519's PublicKey type.
+// See the crypto/ed25519 package for the methods on this type.
+type PublicKey = ed25519.PublicKey
+
+// PrivateKey is the type of Ed25519 private keys. It implements crypto.Signer.
+//
+// This type is an alias for crypto/ed25519's PrivateKey type.
+// See the crypto/ed25519 package for the methods on this type.
+type PrivateKey = ed25519.PrivateKey
+
+// GenerateKey generates a public/private key pair using entropy from rand.
+// If rand is nil, crypto/rand.Reader will be used.
+func GenerateKey(rand io.Reader) (PublicKey, PrivateKey, error) {
+	return ed25519.GenerateKey(rand)
+}
+
+// NewKeyFromSeed calculates a private key from a seed. It will panic if
+// len(seed) is not SeedSize. This function is provided for interoperability
+// with RFC 8032. RFC 8032's private keys correspond to seeds in this
+// package.
+func NewKeyFromSeed(seed []byte) PrivateKey {
+	return ed25519.NewKeyFromSeed(seed)
+}
+
+// Sign signs the message with privateKey and returns a signature. It will
+// panic if len(privateKey) is not PrivateKeySize.
+func Sign(privateKey PrivateKey, message []byte) []byte {
+	return ed25519.Sign(privateKey, message)
+}
+
+// Verify reports whether sig is a valid signature of message by publicKey. It
+// will panic if len(publicKey) is not PublicKeySize.
+func Verify(publicKey PublicKey, message, sig []byte) bool {
+	return ed25519.Verify(publicKey, message, sig)
+}
--- a/vendor/golang.org/x/crypto/go.mod
+++ b/vendor/golang.org/x/crypto/go.mod
@ -1,3 +1,8 @@
 module golang.org/x/crypto

-require golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e
+go 1.11
+
+require (
+	golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3
+	golang.org/x/sys v0.0.0-20190412213103-97732733099d
+)
--- a/vendor/golang.org/x/crypto/internal/chacha20/chacha_arm64.go
+++ b/vendor/golang.org/x/crypto/internal/chacha20/chacha_arm64.go
@ -1,31 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build go1.11
-// +build !gccgo
-
-package chacha20
-
-const (
-	haveAsm = true
-	bufSize = 256
-)
-
-//go:noescape
-func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32)
-
-func (c *Cipher) xorKeyStreamAsm(dst, src []byte) {
-
-	if len(src) >= bufSize {
-		xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter)
-	}
-
-	if len(src)%bufSize != 0 {
-		i := len(src) - len(src)%bufSize
-		c.buf = [bufSize]byte{}
-		copy(c.buf[:], src[i:])
-		xorKeyStreamVX(c.buf[:], c.buf[:], &c.key, &c.nonce, &c.counter)
-		c.len = bufSize - copy(dst[i:], c.buf[:len(src)%bufSize])
-	}
-}
--- a/vendor/golang.org/x/crypto/internal/chacha20/chacha_generic.go
+++ b/vendor/golang.org/x/crypto/internal/chacha20/chacha_generic.go
@ -1,264 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package ChaCha20 implements the core ChaCha20 function as specified
-// in https://tools.ietf.org/html/rfc7539#section-2.3.
-package chacha20
-
-import (
-	"crypto/cipher"
-	"encoding/binary"
-
-	"golang.org/x/crypto/internal/subtle"
-)
-
-// assert that *Cipher implements cipher.Stream
-var _ cipher.Stream = (*Cipher)(nil)
-
-// Cipher is a stateful instance of ChaCha20 using a particular key
-// and nonce. A *Cipher implements the cipher.Stream interface.
-type Cipher struct {
-	key     [8]uint32
-	counter uint32 // incremented after each block
-	nonce   [3]uint32
-	buf     [bufSize]byte // buffer for unused keystream bytes
-	len     int           // number of unused keystream bytes at end of buf
-}
-
-// New creates a new ChaCha20 stream cipher with the given key and nonce.
-// The initial counter value is set to 0.
-func New(key [8]uint32, nonce [3]uint32) *Cipher {
-	return &Cipher{key: key, nonce: nonce}
-}
-
-// ChaCha20 constants spelling "expand 32-byte k"
-const (
-	j0 uint32 = 0x61707865
-	j1 uint32 = 0x3320646e
-	j2 uint32 = 0x79622d32
-	j3 uint32 = 0x6b206574
-)
-
-func quarterRound(a, b, c, d uint32) (uint32, uint32, uint32, uint32) {
-	a += b
-	d ^= a
-	d = (d << 16) | (d >> 16)
-	c += d
-	b ^= c
-	b = (b << 12) | (b >> 20)
-	a += b
-	d ^= a
-	d = (d << 8) | (d >> 24)
-	c += d
-	b ^= c
-	b = (b << 7) | (b >> 25)
-	return a, b, c, d
-}
-
-// XORKeyStream XORs each byte in the given slice with a byte from the
-// cipher's key stream. Dst and src must overlap entirely or not at all.
-//
-// If len(dst) < len(src), XORKeyStream will panic. It is acceptable
-// to pass a dst bigger than src, and in that case, XORKeyStream will
-// only update dst[:len(src)] and will not touch the rest of dst.
-//
-// Multiple calls to XORKeyStream behave as if the concatenation of
-// the src buffers was passed in a single run. That is, Cipher
-// maintains state and does not reset at each XORKeyStream call.
-func (s *Cipher) XORKeyStream(dst, src []byte) {
-	if len(dst) < len(src) {
-		panic("chacha20: output smaller than input")
-	}
-	if subtle.InexactOverlap(dst[:len(src)], src) {
-		panic("chacha20: invalid buffer overlap")
-	}
-
-	// xor src with buffered keystream first
-	if s.len != 0 {
-		buf := s.buf[len(s.buf)-s.len:]
-		if len(src) < len(buf) {
-			buf = buf[:len(src)]
-		}
-		td, ts := dst[:len(buf)], src[:len(buf)] // BCE hint
-		for i, b := range buf {
-			td[i] = ts[i] ^ b
-		}
-		s.len -= len(buf)
-		if s.len != 0 {
-			return
-		}
-		s.buf = [len(s.buf)]byte{} // zero the empty buffer
-		src = src[len(buf):]
-		dst = dst[len(buf):]
-	}
-
-	if len(src) == 0 {
-		return
-	}
-	if haveAsm {
-		if uint64(len(src))+uint64(s.counter)*64 > (1<<38)-64 {
-			panic("chacha20: counter overflow")
-		}
-		s.xorKeyStreamAsm(dst, src)
-		return
-	}
-
-	// set up a 64-byte buffer to pad out the final block if needed
-	// (hoisted out of the main loop to avoid spills)
-	rem := len(src) % 64  // length of final block
-	fin := len(src) - rem // index of final block
-	if rem > 0 {
-		copy(s.buf[len(s.buf)-64:], src[fin:])
-	}
-
-	// pre-calculate most of the first round
-	s1, s5, s9, s13 := quarterRound(j1, s.key[1], s.key[5], s.nonce[0])
-	s2, s6, s10, s14 := quarterRound(j2, s.key[2], s.key[6], s.nonce[1])
-	s3, s7, s11, s15 := quarterRound(j3, s.key[3], s.key[7], s.nonce[2])
-
-	n := len(src)
-	src, dst = src[:n:n], dst[:n:n] // BCE hint
-	for i := 0; i < n; i += 64 {
-		// calculate the remainder of the first round
-		s0, s4, s8, s12 := quarterRound(j0, s.key[0], s.key[4], s.counter)
-
-		// execute the second round
-		x0, x5, x10, x15 := quarterRound(s0, s5, s10, s15)
-		x1, x6, x11, x12 := quarterRound(s1, s6, s11, s12)
-		x2, x7, x8, x13 := quarterRound(s2, s7, s8, s13)
-		x3, x4, x9, x14 := quarterRound(s3, s4, s9, s14)
-
-		// execute the remaining 18 rounds
-		for i := 0; i < 9; i++ {
-			x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
-			x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
-			x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
-			x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
-
-			x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
-			x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
-			x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
-			x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
-		}
-
-		x0 += j0
-		x1 += j1
-		x2 += j2
-		x3 += j3
-
-		x4 += s.key[0]
-		x5 += s.key[1]
-		x6 += s.key[2]
-		x7 += s.key[3]
-		x8 += s.key[4]
-		x9 += s.key[5]
-		x10 += s.key[6]
-		x11 += s.key[7]
-
-		x12 += s.counter
-		x13 += s.nonce[0]
-		x14 += s.nonce[1]
-		x15 += s.nonce[2]
-
-		// increment the counter
-		s.counter += 1
-		if s.counter == 0 {
-			panic("chacha20: counter overflow")
-		}
-
-		// pad to 64 bytes if needed
-		in, out := src[i:], dst[i:]
-		if i == fin {
-			// src[fin:] has already been copied into s.buf before
-			// the main loop
-			in, out = s.buf[len(s.buf)-64:], s.buf[len(s.buf)-64:]
-		}
-		in, out = in[:64], out[:64] // BCE hint
-
-		// XOR the key stream with the source and write out the result
-		xor(out[0:], in[0:], x0)
-		xor(out[4:], in[4:], x1)
-		xor(out[8:], in[8:], x2)
-		xor(out[12:], in[12:], x3)
-		xor(out[16:], in[16:], x4)
-		xor(out[20:], in[20:], x5)
-		xor(out[24:], in[24:], x6)
-		xor(out[28:], in[28:], x7)
-		xor(out[32:], in[32:], x8)
-		xor(out[36:], in[36:], x9)
-		xor(out[40:], in[40:], x10)
-		xor(out[44:], in[44:], x11)
-		xor(out[48:], in[48:], x12)
-		xor(out[52:], in[52:], x13)
-		xor(out[56:], in[56:], x14)
-		xor(out[60:], in[60:], x15)
-	}
-	// copy any trailing bytes out of the buffer and into dst
-	if rem != 0 {
-		s.len = 64 - rem
-		copy(dst[fin:], s.buf[len(s.buf)-64:])
-	}
-}
-
-// Advance discards bytes in the key stream until the next 64 byte block
-// boundary is reached and updates the counter accordingly. If the key
-// stream is already at a block boundary no bytes will be discarded and
-// the counter will be unchanged.
-func (s *Cipher) Advance() {
-	s.len -= s.len % 64
-	if s.len == 0 {
-		s.buf = [len(s.buf)]byte{}
-	}
-}
-
-// XORKeyStream crypts bytes from in to out using the given key and counters.
-// In and out must overlap entirely or not at all. Counter contains the raw
-// ChaCha20 counter bytes (i.e. block counter followed by nonce).
-func XORKeyStream(out, in []byte, counter *[16]byte, key *[32]byte) {
-	s := Cipher{
-		key: [8]uint32{
-			binary.LittleEndian.Uint32(key[0:4]),
-			binary.LittleEndian.Uint32(key[4:8]),
-			binary.LittleEndian.Uint32(key[8:12]),
-			binary.LittleEndian.Uint32(key[12:16]),
-			binary.LittleEndian.Uint32(key[16:20]),
-			binary.LittleEndian.Uint32(key[20:24]),
-			binary.LittleEndian.Uint32(key[24:28]),
-			binary.LittleEndian.Uint32(key[28:32]),
-		},
-		nonce: [3]uint32{
-			binary.LittleEndian.Uint32(counter[4:8]),
-			binary.LittleEndian.Uint32(counter[8:12]),
-			binary.LittleEndian.Uint32(counter[12:16]),
-		},
-		counter: binary.LittleEndian.Uint32(counter[0:4]),
-	}
-	s.XORKeyStream(out, in)
-}
-
-// HChaCha20 uses the ChaCha20 core to generate a derived key from a key and a
-// nonce. It should only be used as part of the XChaCha20 construction.
-func HChaCha20(key *[8]uint32, nonce *[4]uint32) [8]uint32 {
-	x0, x1, x2, x3 := j0, j1, j2, j3
-	x4, x5, x6, x7 := key[0], key[1], key[2], key[3]
-	x8, x9, x10, x11 := key[4], key[5], key[6], key[7]
-	x12, x13, x14, x15 := nonce[0], nonce[1], nonce[2], nonce[3]
-
-	for i := 0; i < 10; i++ {
-		x0, x4, x8, x12 = quarterRound(x0, x4, x8, x12)
-		x1, x5, x9, x13 = quarterRound(x1, x5, x9, x13)
-		x2, x6, x10, x14 = quarterRound(x2, x6, x10, x14)
-		x3, x7, x11, x15 = quarterRound(x3, x7, x11, x15)
-
-		x0, x5, x10, x15 = quarterRound(x0, x5, x10, x15)
-		x1, x6, x11, x12 = quarterRound(x1, x6, x11, x12)
-		x2, x7, x8, x13 = quarterRound(x2, x7, x8, x13)
-		x3, x4, x9, x14 = quarterRound(x3, x4, x9, x14)
-	}
-
-	var out [8]uint32
-	out[0], out[1], out[2], out[3] = x0, x1, x2, x3
-	out[4], out[5], out[6], out[7] = x12, x13, x14, x15
-	return out
-}
--- a/vendor/golang.org/x/crypto/internal/chacha20/chacha_noasm.go
+++ b/vendor/golang.org/x/crypto/internal/chacha20/chacha_noasm.go
@ -1,16 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !arm64,!s390x arm64,!go1.11 gccgo appengine
-
-package chacha20
-
-const (
-	bufSize = 64
-	haveAsm = false
-)
-
-func (*Cipher) xorKeyStreamAsm(dst, src []byte) {
-	panic("not implemented")
-}
--- a/vendor/golang.org/x/crypto/internal/chacha20/chacha_s390x.go
+++ b/vendor/golang.org/x/crypto/internal/chacha20/chacha_s390x.go
@ -1,29 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build s390x,!gccgo,!appengine
-
-package chacha20
-
-import (
-	"golang.org/x/sys/cpu"
-)
-
-var haveAsm = cpu.S390X.HasVX
-
-const bufSize = 256
-
-// xorKeyStreamVX is an assembly implementation of XORKeyStream. It must only
-// be called when the vector facility is available.
-// Implementation in asm_s390x.s.
-//go:noescape
-func xorKeyStreamVX(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32, buf *[256]byte, len *int)
-
-func (c *Cipher) xorKeyStreamAsm(dst, src []byte) {
-	xorKeyStreamVX(dst, src, &c.key, &c.nonce, &c.counter, &c.buf, &c.len)
-}
-
-// EXRL targets, DO NOT CALL!
-func mvcSrcToBuf()
-func mvcBufToDst()
--- a/vendor/golang.org/x/crypto/poly1305/bits_compat.go
+++ b/vendor/golang.org/x/crypto/poly1305/bits_compat.go
@ -0,0 +1,39 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !go1.13
+
+package poly1305
+
+// Generic fallbacks for the math/bits intrinsics, copied from
+// src/math/bits/bits.go. They were added in Go 1.12, but Add64 and Sum64 had
+// variable time fallbacks until Go 1.13.
+
+func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
+	sum = x + y + carry
+	carryOut = ((x & y) | ((x | y) &^ sum)) >> 63
+	return
+}
+
+func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
+	diff = x - y - borrow
+	borrowOut = ((^x & y) | (^(x ^ y) & diff)) >> 63
+	return
+}
+
+func bitsMul64(x, y uint64) (hi, lo uint64) {
+	const mask32 = 1<<32 - 1
+	x0 := x & mask32
+	x1 := x >> 32
+	y0 := y & mask32
+	y1 := y >> 32
+	w0 := x0 * y0
+	t := x1*y0 + w0>>32
+	w1 := t & mask32
+	w2 := t >> 32
+	w1 += x0 * y1
+	hi = x1*y1 + w2 + w1>>32
+	lo = x * y
+	return
+}
--- a/vendor/golang.org/x/crypto/poly1305/bits_go1.13.go
+++ b/vendor/golang.org/x/crypto/poly1305/bits_go1.13.go
@ -0,0 +1,21 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build go1.13
+
+package poly1305
+
+import "math/bits"
+
+func bitsAdd64(x, y, carry uint64) (sum, carryOut uint64) {
+	return bits.Add64(x, y, carry)
+}
+
+func bitsSub64(x, y, borrow uint64) (diff, borrowOut uint64) {
+	return bits.Sub64(x, y, borrow)
+}
+
+func bitsMul64(x, y uint64) (hi, lo uint64) {
+	return bits.Mul64(x, y)
+}
--- a/vendor/golang.org/x/crypto/poly1305/mac_noasm.go
+++ b/vendor/golang.org/x/crypto/poly1305/mac_noasm.go
@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build !amd64 gccgo appengine
+// +build !amd64,!ppc64le gccgo appengine

 package poly1305

--- a/vendor/golang.org/x/crypto/poly1305/poly1305.go
+++ b/vendor/golang.org/x/crypto/poly1305/poly1305.go
@ -22,8 +22,14 @@ import "crypto/subtle"
 // TagSize is the size, in bytes, of a poly1305 authenticator.
 const TagSize = 16

-// Verify returns true if mac is a valid authenticator for m with the given
-// key.
+// Sum generates an authenticator for msg using a one-time key and puts the
+// 16-byte result into out. Authenticating two different messages with the same
+// key allows an attacker to forge messages at will.
+func Sum(out *[16]byte, m []byte, key *[32]byte) {
+	sum(out, m, key)
+}
+
+// Verify returns true if mac is a valid authenticator for m with the given key.
 func Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
 	var tmp [16]byte
 	Sum(&tmp, m, key)
--- a/vendor/golang.org/x/crypto/poly1305/sum_amd64.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.go
@ -7,62 +7,52 @@
 package poly1305

 //go:noescape
-func initialize(state *[7]uint64, key *[32]byte)
+func update(state *macState, msg []byte)

-//go:noescape
-func update(state *[7]uint64, msg []byte)
-
-//go:noescape
-func finalize(tag *[TagSize]byte, state *[7]uint64)
-
-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
+func sum(out *[16]byte, m []byte, key *[32]byte) {
 	h := newMAC(key)
 	h.Write(m)
 	h.Sum(out)
 }

 func newMAC(key *[32]byte) (h mac) {
-	initialize(&h.state, key)
+	initialize(key, &h.r, &h.s)
 	return
 }

-type mac struct {
-	state [7]uint64 // := uint64{ h0, h1, h2, r0, r1, pad0, pad1 }
+// mac is a wrapper for macGeneric that redirects calls that would have gone to
+// updateGeneric to update.
+//
+// Its Write and Sum methods are otherwise identical to the macGeneric ones, but
+// using function pointers would carry a major performance cost.
+type mac struct{ macGeneric }

-	buffer [TagSize]byte
-	offset int
-}
-
-func (h *mac) Write(p []byte) (n int, err error) {
-	n = len(p)
+func (h *mac) Write(p []byte) (int, error) {
+	nn := len(p)
 	if h.offset > 0 {
-		remaining := TagSize - h.offset
-		if n < remaining {
-			h.offset += copy(h.buffer[h.offset:], p)
-			return n, nil
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < TagSize {
+			h.offset += n
+			return nn, nil
 		}
-		copy(h.buffer[h.offset:], p[:remaining])
-		p = p[remaining:]
+		p = p[n:]
 		h.offset = 0
-		update(&h.state, h.buffer[:])
+		update(&h.macState, h.buffer[:])
 	}
-	if nn := len(p) - (len(p) % TagSize); nn > 0 {
-		update(&h.state, p[:nn])
-		p = p[nn:]
+	if n := len(p) - (len(p) % TagSize); n > 0 {
+		update(&h.macState, p[:n])
+		p = p[n:]
 	}
 	if len(p) > 0 {
 		h.offset += copy(h.buffer[h.offset:], p)
 	}
-	return n, nil
+	return nn, nil
 }

 func (h *mac) Sum(out *[16]byte) {
-	state := h.state
+	state := h.macState
 	if h.offset > 0 {
 		update(&state, h.buffer[:h.offset])
 	}
-	finalize(out, &state)
+	finalize(out, &state.h, &state.s)
 }
--- a/vendor/golang.org/x/crypto/poly1305/sum_amd64.s
+++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.s
@ -54,10 +54,6 @@
 	ADCQ  t3, h1;                  \
 	ADCQ  $0, h2

-DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
-DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
-GLOBL ·poly1305Mask<>(SB), RODATA, $16
-
 // func update(state *[7]uint64, msg []byte)
 TEXT ·update(SB), $0-32
 	MOVQ state+0(FP), DI
@ -110,39 +106,3 @@ done:
 	MOVQ R9, 8(DI)
 	MOVQ R10, 16(DI)
 	RET
-
-// func initialize(state *[7]uint64, key *[32]byte)
-TEXT ·initialize(SB), $0-16
-	MOVQ state+0(FP), DI
-	MOVQ key+8(FP), SI
-
-	// state[0...7] is initialized with zero
-	MOVOU 0(SI), X0
-	MOVOU 16(SI), X1
-	MOVOU ·poly1305Mask<>(SB), X2
-	PAND  X2, X0
-	MOVOU X0, 24(DI)
-	MOVOU X1, 40(DI)
-	RET
-
-// func finalize(tag *[TagSize]byte, state *[7]uint64)
-TEXT ·finalize(SB), $0-16
-	MOVQ tag+0(FP), DI
-	MOVQ state+8(FP), SI
-
-	MOVQ    0(SI), AX
-	MOVQ    8(SI), BX
-	MOVQ    16(SI), CX
-	MOVQ    AX, R8
-	MOVQ    BX, R9
-	SUBQ    $0xFFFFFFFFFFFFFFFB, AX
-	SBBQ    $0xFFFFFFFFFFFFFFFF, BX
-	SBBQ    $3, CX
-	CMOVQCS R8, AX
-	CMOVQCS R9, BX
-	ADDQ    40(SI), AX
-	ADCQ    48(SI), BX
-
-	MOVQ AX, 0(DI)
-	MOVQ BX, 8(DI)
-	RET
--- a/vendor/golang.org/x/crypto/poly1305/sum_arm.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_arm.go
@ -1,22 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build arm,!gccgo,!appengine,!nacl
-
-package poly1305
-
-// This function is implemented in sum_arm.s
-//go:noescape
-func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]byte)
-
-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
-	var mPtr *byte
-	if len(m) > 0 {
-		mPtr = &m[0]
-	}
-	poly1305_auth_armv6(out, mPtr, uint32(len(m)), key)
-}
--- a/vendor/golang.org/x/crypto/poly1305/sum_arm.s
+++ b/vendor/golang.org/x/crypto/poly1305/sum_arm.s
@ -1,427 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build arm,!gccgo,!appengine,!nacl
-
-#include "textflag.h"
-
-// This code was translated into a form compatible with 5a from the public
-// domain source by Andrew Moon: github.com/floodyberry/poly1305-opt/blob/master/app/extensions/poly1305.
-
-DATA ·poly1305_init_constants_armv6<>+0x00(SB)/4, $0x3ffffff
-DATA ·poly1305_init_constants_armv6<>+0x04(SB)/4, $0x3ffff03
-DATA ·poly1305_init_constants_armv6<>+0x08(SB)/4, $0x3ffc0ff
-DATA ·poly1305_init_constants_armv6<>+0x0c(SB)/4, $0x3f03fff
-DATA ·poly1305_init_constants_armv6<>+0x10(SB)/4, $0x00fffff
-GLOBL ·poly1305_init_constants_armv6<>(SB), 8, $20
-
-// Warning: the linker may use R11 to synthesize certain instructions. Please
-// take care and verify that no synthetic instructions use it.
-
-TEXT poly1305_init_ext_armv6<>(SB), NOSPLIT, $0
-	// Needs 16 bytes of stack and 64 bytes of space pointed to by R0.  (It
-	// might look like it's only 60 bytes of space but the final four bytes
-	// will be written by another function.) We need to skip over four
-	// bytes of stack because that's saving the value of 'g'.
-	ADD       $4, R13, R8
-	MOVM.IB   [R4-R7], (R8)
-	MOVM.IA.W (R1), [R2-R5]
-	MOVW      $·poly1305_init_constants_armv6<>(SB), R7
-	MOVW      R2, R8
-	MOVW      R2>>26, R9
-	MOVW      R3>>20, g
-	MOVW      R4>>14, R11
-	MOVW      R5>>8, R12
-	ORR       R3<<6, R9, R9
-	ORR       R4<<12, g, g
-	ORR       R5<<18, R11, R11
-	MOVM.IA   (R7), [R2-R6]
-	AND       R8, R2, R2
-	AND       R9, R3, R3
-	AND       g, R4, R4
-	AND       R11, R5, R5
-	AND       R12, R6, R6
-	MOVM.IA.W [R2-R6], (R0)
-	EOR       R2, R2, R2
-	EOR       R3, R3, R3
-	EOR       R4, R4, R4
-	EOR       R5, R5, R5
-	EOR       R6, R6, R6
-	MOVM.IA.W [R2-R6], (R0)
-	MOVM.IA.W (R1), [R2-R5]
-	MOVM.IA   [R2-R6], (R0)
-	ADD       $20, R13, R0
-	MOVM.DA   (R0), [R4-R7]
-	RET
-
-#define MOVW_UNALIGNED(Rsrc, Rdst, Rtmp, offset) \
-	MOVBU (offset+0)(Rsrc), Rtmp; \
-	MOVBU Rtmp, (offset+0)(Rdst); \
-	MOVBU (offset+1)(Rsrc), Rtmp; \
-	MOVBU Rtmp, (offset+1)(Rdst); \
-	MOVBU (offset+2)(Rsrc), Rtmp; \
-	MOVBU Rtmp, (offset+2)(Rdst); \
-	MOVBU (offset+3)(Rsrc), Rtmp; \
-	MOVBU Rtmp, (offset+3)(Rdst)
-
-TEXT poly1305_blocks_armv6<>(SB), NOSPLIT, $0
-	// Needs 24 bytes of stack for saved registers and then 88 bytes of
-	// scratch space after that. We assume that 24 bytes at (R13) have
-	// already been used: four bytes for the link register saved in the
-	// prelude of poly1305_auth_armv6, four bytes for saving the value of g
-	// in that function and 16 bytes of scratch space used around
-	// poly1305_finish_ext_armv6_skip1.
-	ADD     $24, R13, R12
-	MOVM.IB [R4-R8, R14], (R12)
-	MOVW    R0, 88(R13)
-	MOVW    R1, 92(R13)
-	MOVW    R2, 96(R13)
-	MOVW    R1, R14
-	MOVW    R2, R12
-	MOVW    56(R0), R8
-	WORD    $0xe1180008                // TST R8, R8 not working see issue 5921
-	EOR     R6, R6, R6
-	MOVW.EQ $(1<<24), R6
-	MOVW    R6, 84(R13)
-	ADD     $116, R13, g
-	MOVM.IA (R0), [R0-R9]
-	MOVM.IA [R0-R4], (g)
-	CMP     $16, R12
-	BLO     poly1305_blocks_armv6_done
-
-poly1305_blocks_armv6_mainloop:
-	WORD    $0xe31e0003                            // TST R14, #3 not working see issue 5921
-	BEQ     poly1305_blocks_armv6_mainloop_aligned
-	ADD     $100, R13, g
-	MOVW_UNALIGNED(R14, g, R0, 0)
-	MOVW_UNALIGNED(R14, g, R0, 4)
-	MOVW_UNALIGNED(R14, g, R0, 8)
-	MOVW_UNALIGNED(R14, g, R0, 12)
-	MOVM.IA (g), [R0-R3]
-	ADD     $16, R14
-	B       poly1305_blocks_armv6_mainloop_loaded
-
-poly1305_blocks_armv6_mainloop_aligned:
-	MOVM.IA.W (R14), [R0-R3]
-
-poly1305_blocks_armv6_mainloop_loaded:
-	MOVW    R0>>26, g
-	MOVW    R1>>20, R11
-	MOVW    R2>>14, R12
-	MOVW    R14, 92(R13)
-	MOVW    R3>>8, R4
-	ORR     R1<<6, g, g
-	ORR     R2<<12, R11, R11
-	ORR     R3<<18, R12, R12
-	BIC     $0xfc000000, R0, R0
-	BIC     $0xfc000000, g, g
-	MOVW    84(R13), R3
-	BIC     $0xfc000000, R11, R11
-	BIC     $0xfc000000, R12, R12
-	ADD     R0, R5, R5
-	ADD     g, R6, R6
-	ORR     R3, R4, R4
-	ADD     R11, R7, R7
-	ADD     $116, R13, R14
-	ADD     R12, R8, R8
-	ADD     R4, R9, R9
-	MOVM.IA (R14), [R0-R4]
-	MULLU   R4, R5, (R11, g)
-	MULLU   R3, R5, (R14, R12)
-	MULALU  R3, R6, (R11, g)
-	MULALU  R2, R6, (R14, R12)
-	MULALU  R2, R7, (R11, g)
-	MULALU  R1, R7, (R14, R12)
-	ADD     R4<<2, R4, R4
-	ADD     R3<<2, R3, R3
-	MULALU  R1, R8, (R11, g)
-	MULALU  R0, R8, (R14, R12)
-	MULALU  R0, R9, (R11, g)
-	MULALU  R4, R9, (R14, R12)
-	MOVW    g, 76(R13)
-	MOVW    R11, 80(R13)
-	MOVW    R12, 68(R13)
-	MOVW    R14, 72(R13)
-	MULLU   R2, R5, (R11, g)
-	MULLU   R1, R5, (R14, R12)
-	MULALU  R1, R6, (R11, g)
-	MULALU  R0, R6, (R14, R12)
-	MULALU  R0, R7, (R11, g)
-	MULALU  R4, R7, (R14, R12)
-	ADD     R2<<2, R2, R2
-	ADD     R1<<2, R1, R1
-	MULALU  R4, R8, (R11, g)
-	MULALU  R3, R8, (R14, R12)
-	MULALU  R3, R9, (R11, g)
-	MULALU  R2, R9, (R14, R12)
-	MOVW    g, 60(R13)
-	MOVW    R11, 64(R13)
-	MOVW    R12, 52(R13)
-	MOVW    R14, 56(R13)
-	MULLU   R0, R5, (R11, g)
-	MULALU  R4, R6, (R11, g)
-	MULALU  R3, R7, (R11, g)
-	MULALU  R2, R8, (R11, g)
-	MULALU  R1, R9, (R11, g)
-	ADD     $52, R13, R0
-	MOVM.IA (R0), [R0-R7]
-	MOVW    g>>26, R12
-	MOVW    R4>>26, R14
-	ORR     R11<<6, R12, R12
-	ORR     R5<<6, R14, R14
-	BIC     $0xfc000000, g, g
-	BIC     $0xfc000000, R4, R4
-	ADD.S   R12, R0, R0
-	ADC     $0, R1, R1
-	ADD.S   R14, R6, R6
-	ADC     $0, R7, R7
-	MOVW    R0>>26, R12
-	MOVW    R6>>26, R14
-	ORR     R1<<6, R12, R12
-	ORR     R7<<6, R14, R14
-	BIC     $0xfc000000, R0, R0
-	BIC     $0xfc000000, R6, R6
-	ADD     R14<<2, R14, R14
-	ADD.S   R12, R2, R2
-	ADC     $0, R3, R3
-	ADD     R14, g, g
-	MOVW    R2>>26, R12
-	MOVW    g>>26, R14
-	ORR     R3<<6, R12, R12
-	BIC     $0xfc000000, g, R5
-	BIC     $0xfc000000, R2, R7
-	ADD     R12, R4, R4
-	ADD     R14, R0, R0
-	MOVW    R4>>26, R12
-	BIC     $0xfc000000, R4, R8
-	ADD     R12, R6, R9
-	MOVW    96(R13), R12
-	MOVW    92(R13), R14
-	MOVW    R0, R6
-	CMP     $32, R12
-	SUB     $16, R12, R12
-	MOVW    R12, 96(R13)
-	BHS     poly1305_blocks_armv6_mainloop
-
-poly1305_blocks_armv6_done:
-	MOVW    88(R13), R12
-	MOVW    R5, 20(R12)
-	MOVW    R6, 24(R12)
-	MOVW    R7, 28(R12)
-	MOVW    R8, 32(R12)
-	MOVW    R9, 36(R12)
-	ADD     $48, R13, R0
-	MOVM.DA (R0), [R4-R8, R14]
-	RET
-
-#define MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp) \
-	MOVBU.P 1(Rsrc), Rtmp; \
-	MOVBU.P Rtmp, 1(Rdst); \
-	MOVBU.P 1(Rsrc), Rtmp; \
-	MOVBU.P Rtmp, 1(Rdst)
-
-#define MOVWP_UNALIGNED(Rsrc, Rdst, Rtmp) \
-	MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp); \
-	MOVHUP_UNALIGNED(Rsrc, Rdst, Rtmp)
-
-// func poly1305_auth_armv6(out *[16]byte, m *byte, mlen uint32, key *[32]key)
-TEXT ·poly1305_auth_armv6(SB), $196-16
-	// The value 196, just above, is the sum of 64 (the size of the context
-	// structure) and 132 (the amount of stack needed).
-	//
-	// At this point, the stack pointer (R13) has been moved down. It
-	// points to the saved link register and there's 196 bytes of free
-	// space above it.
-	//
-	// The stack for this function looks like:
-	//
-	// +---------------------
-	// |
-	// | 64 bytes of context structure
-	// |
-	// +---------------------
-	// |
-	// | 112 bytes for poly1305_blocks_armv6
-	// |
-	// +---------------------
-	// | 16 bytes of final block, constructed at
-	// | poly1305_finish_ext_armv6_skip8
-	// +---------------------
-	// | four bytes of saved 'g'
-	// +---------------------
-	// | lr, saved by prelude    <- R13 points here
-	// +---------------------
-	MOVW g, 4(R13)
-
-	MOVW out+0(FP), R4
-	MOVW m+4(FP), R5
-	MOVW mlen+8(FP), R6
-	MOVW key+12(FP), R7
-
-	ADD  $136, R13, R0 // 136 = 4 + 4 + 16 + 112
-	MOVW R7, R1
-
-	// poly1305_init_ext_armv6 will write to the stack from R13+4, but
-	// that's ok because none of the other values have been written yet.
-	BL    poly1305_init_ext_armv6<>(SB)
-	BIC.S $15, R6, R2
-	BEQ   poly1305_auth_armv6_noblocks
-	ADD   $136, R13, R0
-	MOVW  R5, R1
-	ADD   R2, R5, R5
-	SUB   R2, R6, R6
-	BL    poly1305_blocks_armv6<>(SB)
-
-poly1305_auth_armv6_noblocks:
-	ADD  $136, R13, R0
-	MOVW R5, R1
-	MOVW R6, R2
-	MOVW R4, R3
-
-	MOVW  R0, R5
-	MOVW  R1, R6
-	MOVW  R2, R7
-	MOVW  R3, R8
-	AND.S R2, R2, R2
-	BEQ   poly1305_finish_ext_armv6_noremaining
-	EOR   R0, R0
-	ADD   $8, R13, R9                           // 8 = offset to 16 byte scratch space
-	MOVW  R0, (R9)
-	MOVW  R0, 4(R9)
-	MOVW  R0, 8(R9)
-	MOVW  R0, 12(R9)
-	WORD  $0xe3110003                           // TST R1, #3 not working see issue 5921
-	BEQ   poly1305_finish_ext_armv6_aligned
-	WORD  $0xe3120008                           // TST R2, #8 not working see issue 5921
-	BEQ   poly1305_finish_ext_armv6_skip8
-	MOVWP_UNALIGNED(R1, R9, g)
-	MOVWP_UNALIGNED(R1, R9, g)
-
-poly1305_finish_ext_armv6_skip8:
-	WORD $0xe3120004                     // TST $4, R2 not working see issue 5921
-	BEQ  poly1305_finish_ext_armv6_skip4
-	MOVWP_UNALIGNED(R1, R9, g)
-
-poly1305_finish_ext_armv6_skip4:
-	WORD $0xe3120002                     // TST $2, R2 not working see issue 5921
-	BEQ  poly1305_finish_ext_armv6_skip2
-	MOVHUP_UNALIGNED(R1, R9, g)
-	B    poly1305_finish_ext_armv6_skip2
-
-poly1305_finish_ext_armv6_aligned:
-	WORD      $0xe3120008                             // TST R2, #8 not working see issue 5921
-	BEQ       poly1305_finish_ext_armv6_skip8_aligned
-	MOVM.IA.W (R1), [g-R11]
-	MOVM.IA.W [g-R11], (R9)
-
-poly1305_finish_ext_armv6_skip8_aligned:
-	WORD   $0xe3120004                             // TST $4, R2 not working see issue 5921
-	BEQ    poly1305_finish_ext_armv6_skip4_aligned
-	MOVW.P 4(R1), g
-	MOVW.P g, 4(R9)
-
-poly1305_finish_ext_armv6_skip4_aligned:
-	WORD    $0xe3120002                     // TST $2, R2 not working see issue 5921
-	BEQ     poly1305_finish_ext_armv6_skip2
-	MOVHU.P 2(R1), g
-	MOVH.P  g, 2(R9)
-
-poly1305_finish_ext_armv6_skip2:
-	WORD    $0xe3120001                     // TST $1, R2 not working see issue 5921
-	BEQ     poly1305_finish_ext_armv6_skip1
-	MOVBU.P 1(R1), g
-	MOVBU.P g, 1(R9)
-
-poly1305_finish_ext_armv6_skip1:
-	MOVW  $1, R11
-	MOVBU R11, 0(R9)
-	MOVW  R11, 56(R5)
-	MOVW  R5, R0
-	ADD   $8, R13, R1
-	MOVW  $16, R2
-	BL    poly1305_blocks_armv6<>(SB)
-
-poly1305_finish_ext_armv6_noremaining:
-	MOVW      20(R5), R0
-	MOVW      24(R5), R1
-	MOVW      28(R5), R2
-	MOVW      32(R5), R3
-	MOVW      36(R5), R4
-	MOVW      R4>>26, R12
-	BIC       $0xfc000000, R4, R4
-	ADD       R12<<2, R12, R12
-	ADD       R12, R0, R0
-	MOVW      R0>>26, R12
-	BIC       $0xfc000000, R0, R0
-	ADD       R12, R1, R1
-	MOVW      R1>>26, R12
-	BIC       $0xfc000000, R1, R1
-	ADD       R12, R2, R2
-	MOVW      R2>>26, R12
-	BIC       $0xfc000000, R2, R2
-	ADD       R12, R3, R3
-	MOVW      R3>>26, R12
-	BIC       $0xfc000000, R3, R3
-	ADD       R12, R4, R4
-	ADD       $5, R0, R6
-	MOVW      R6>>26, R12
-	BIC       $0xfc000000, R6, R6
-	ADD       R12, R1, R7
-	MOVW      R7>>26, R12
-	BIC       $0xfc000000, R7, R7
-	ADD       R12, R2, g
-	MOVW      g>>26, R12
-	BIC       $0xfc000000, g, g
-	ADD       R12, R3, R11
-	MOVW      $-(1<<26), R12
-	ADD       R11>>26, R12, R12
-	BIC       $0xfc000000, R11, R11
-	ADD       R12, R4, R9
-	MOVW      R9>>31, R12
-	SUB       $1, R12
-	AND       R12, R6, R6
-	AND       R12, R7, R7
-	AND       R12, g, g
-	AND       R12, R11, R11
-	AND       R12, R9, R9
-	MVN       R12, R12
-	AND       R12, R0, R0
-	AND       R12, R1, R1
-	AND       R12, R2, R2
-	AND       R12, R3, R3
-	AND       R12, R4, R4
-	ORR       R6, R0, R0
-	ORR       R7, R1, R1
-	ORR       g, R2, R2
-	ORR       R11, R3, R3
-	ORR       R9, R4, R4
-	ORR       R1<<26, R0, R0
-	MOVW      R1>>6, R1
-	ORR       R2<<20, R1, R1
-	MOVW      R2>>12, R2
-	ORR       R3<<14, R2, R2
-	MOVW      R3>>18, R3
-	ORR       R4<<8, R3, R3
-	MOVW      40(R5), R6
-	MOVW      44(R5), R7
-	MOVW      48(R5), g
-	MOVW      52(R5), R11
-	ADD.S     R6, R0, R0
-	ADC.S     R7, R1, R1
-	ADC.S     g, R2, R2
-	ADC.S     R11, R3, R3
-	MOVM.IA   [R0-R3], (R8)
-	MOVW      R5, R12
-	EOR       R0, R0, R0
-	EOR       R1, R1, R1
-	EOR       R2, R2, R2
-	EOR       R3, R3, R3
-	EOR       R4, R4, R4
-	EOR       R5, R5, R5
-	EOR       R6, R6, R6
-	EOR       R7, R7, R7
-	MOVM.IA.W [R0-R7], (R12)
-	MOVM.IA   [R0-R7], (R12)
-	MOVW      4(R13), g
-	RET
--- a/vendor/golang.org/x/crypto/poly1305/sum_generic.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_generic.go
@ -2,18 +2,29 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+// This file provides the generic implementation of Sum and MAC. Other files
+// might provide optimized assembly implementations of some of this code.
+
 package poly1305

 import "encoding/binary"

-const (
-	msgBlock   = uint32(1 << 24)
-	finalBlock = uint32(0)
-)
+// Poly1305 [RFC 7539] is a relatively simple algorithm: the authentication tag
+// for a 64 bytes message is approximately
+//
+//     s + m[0:16] * r⁴ + m[16:32] * r³ + m[32:48] * r² + m[48:64] * r  mod  2¹³⁰ - 5
+//
+// for some secret r and s. It can be computed sequentially like
+//
+//     for len(msg) > 0:
+//         h += read(msg, 16)
+//         h *= r
+//         h %= 2¹³⁰ - 5
+//     return h + s
+//
+// All the complexity is about doing performant constant-time math on numbers
+// larger than any available numeric type.

-// sumGeneric generates an authenticator for msg using a one-time key and
-// puts the 16-byte result into out. This is the generic implementation of
-// Sum and should be called if no assembly implementation is available.
 func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
 	h := newMACGeneric(key)
 	h.Write(msg)
@ -21,152 +32,276 @@ func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
 }

 func newMACGeneric(key *[32]byte) (h macGeneric) {
-	h.r[0] = binary.LittleEndian.Uint32(key[0:]) & 0x3ffffff
-	h.r[1] = (binary.LittleEndian.Uint32(key[3:]) >> 2) & 0x3ffff03
-	h.r[2] = (binary.LittleEndian.Uint32(key[6:]) >> 4) & 0x3ffc0ff
-	h.r[3] = (binary.LittleEndian.Uint32(key[9:]) >> 6) & 0x3f03fff
-	h.r[4] = (binary.LittleEndian.Uint32(key[12:]) >> 8) & 0x00fffff
-
-	h.s[0] = binary.LittleEndian.Uint32(key[16:])
-	h.s[1] = binary.LittleEndian.Uint32(key[20:])
-	h.s[2] = binary.LittleEndian.Uint32(key[24:])
-	h.s[3] = binary.LittleEndian.Uint32(key[28:])
+	initialize(key, &h.r, &h.s)
 	return
 }

+// macState holds numbers in saturated 64-bit little-endian limbs. That is,
+// the value of [x0, x1, x2] is x[0] + x[1] * 2⁶⁴ + x[2] * 2¹²⁸.
+type macState struct {
+	// h is the main accumulator. It is to be interpreted modulo 2¹³⁰ - 5, but
+	// can grow larger during and after rounds.
+	h [3]uint64
+	// r and s are the private key components.
+	r [2]uint64
+	s [2]uint64
+}
+
 type macGeneric struct {
-	h, r [5]uint32
-	s    [4]uint32
+	macState

 	buffer [TagSize]byte
 	offset int
 }

-func (h *macGeneric) Write(p []byte) (n int, err error) {
-	n = len(p)
+// Write splits the incoming message into TagSize chunks, and passes them to
+// update. It buffers incomplete chunks.
+func (h *macGeneric) Write(p []byte) (int, error) {
+	nn := len(p)
 	if h.offset > 0 {
-		remaining := TagSize - h.offset
-		if n < remaining {
-			h.offset += copy(h.buffer[h.offset:], p)
-			return n, nil
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < TagSize {
+			h.offset += n
+			return nn, nil
 		}
-		copy(h.buffer[h.offset:], p[:remaining])
-		p = p[remaining:]
+		p = p[n:]
 		h.offset = 0
-		updateGeneric(h.buffer[:], msgBlock, &(h.h), &(h.r))
+		updateGeneric(&h.macState, h.buffer[:])
 	}
-	if nn := len(p) - (len(p) % TagSize); nn > 0 {
-		updateGeneric(p, msgBlock, &(h.h), &(h.r))
-		p = p[nn:]
+	if n := len(p) - (len(p) % TagSize); n > 0 {
+		updateGeneric(&h.macState, p[:n])
+		p = p[n:]
 	}
 	if len(p) > 0 {
 		h.offset += copy(h.buffer[h.offset:], p)
 	}
-	return n, nil
+	return nn, nil
 }

-func (h *macGeneric) Sum(out *[16]byte) {
-	H, R := h.h, h.r
+// Sum flushes the last incomplete chunk from the buffer, if any, and generates
+// the MAC output. It does not modify its state, in order to allow for multiple
+// calls to Sum, even if no Write is allowed after Sum.
+func (h *macGeneric) Sum(out *[TagSize]byte) {
+	state := h.macState
 	if h.offset > 0 {
-		var buffer [TagSize]byte
-		copy(buffer[:], h.buffer[:h.offset])
-		buffer[h.offset] = 1 // invariant: h.offset < TagSize
-		updateGeneric(buffer[:], finalBlock, &H, &R)
+		updateGeneric(&state, h.buffer[:h.offset])
 	}
-	finalizeGeneric(out, &H, &(h.s))
+	finalize(out, &state.h, &state.s)
 }

-func updateGeneric(msg []byte, flag uint32, h, r *[5]uint32) {
-	h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
-	r0, r1, r2, r3, r4 := uint64(r[0]), uint64(r[1]), uint64(r[2]), uint64(r[3]), uint64(r[4])
-	R1, R2, R3, R4 := r1*5, r2*5, r3*5, r4*5
+// [rMask0, rMask1] is the specified Poly1305 clamping mask in little-endian. It
+// clears some bits of the secret coefficient to make it possible to implement
+// multiplication more efficiently.
+const (
+	rMask0 = 0x0FFFFFFC0FFFFFFF
+	rMask1 = 0x0FFFFFFC0FFFFFFC
+)

-	for len(msg) >= TagSize {
-		// h += msg
-		h0 += binary.LittleEndian.Uint32(msg[0:]) & 0x3ffffff
-		h1 += (binary.LittleEndian.Uint32(msg[3:]) >> 2) & 0x3ffffff
-		h2 += (binary.LittleEndian.Uint32(msg[6:]) >> 4) & 0x3ffffff
-		h3 += (binary.LittleEndian.Uint32(msg[9:]) >> 6) & 0x3ffffff
-		h4 += (binary.LittleEndian.Uint32(msg[12:]) >> 8) | flag
+func initialize(key *[32]byte, r, s *[2]uint64) {
+	r[0] = binary.LittleEndian.Uint64(key[0:8]) & rMask0
+	r[1] = binary.LittleEndian.Uint64(key[8:16]) & rMask1
+	s[0] = binary.LittleEndian.Uint64(key[16:24])
+	s[1] = binary.LittleEndian.Uint64(key[24:32])
+}

-		// h *= r
-		d0 := (uint64(h0) * r0) + (uint64(h1) * R4) + (uint64(h2) * R3) + (uint64(h3) * R2) + (uint64(h4) * R1)
-		d1 := (d0 >> 26) + (uint64(h0) * r1) + (uint64(h1) * r0) + (uint64(h2) * R4) + (uint64(h3) * R3) + (uint64(h4) * R2)
-		d2 := (d1 >> 26) + (uint64(h0) * r2) + (uint64(h1) * r1) + (uint64(h2) * r0) + (uint64(h3) * R4) + (uint64(h4) * R3)
-		d3 := (d2 >> 26) + (uint64(h0) * r3) + (uint64(h1) * r2) + (uint64(h2) * r1) + (uint64(h3) * r0) + (uint64(h4) * R4)
-		d4 := (d3 >> 26) + (uint64(h0) * r4) + (uint64(h1) * r3) + (uint64(h2) * r2) + (uint64(h3) * r1) + (uint64(h4) * r0)
+// uint128 holds a 128-bit number as two 64-bit limbs, for use with the
+// bits.Mul64 and bits.Add64 intrinsics.
+type uint128 struct {
+	lo, hi uint64
+}

-		// h %= p
-		h0 = uint32(d0) & 0x3ffffff
-		h1 = uint32(d1) & 0x3ffffff
-		h2 = uint32(d2) & 0x3ffffff
-		h3 = uint32(d3) & 0x3ffffff
-		h4 = uint32(d4) & 0x3ffffff
+func mul64(a, b uint64) uint128 {
+	hi, lo := bitsMul64(a, b)
+	return uint128{lo, hi}
+}

-		h0 += uint32(d4>>26) * 5
-		h1 += h0 >> 26
-		h0 = h0 & 0x3ffffff
+func add128(a, b uint128) uint128 {
+	lo, c := bitsAdd64(a.lo, b.lo, 0)
+	hi, c := bitsAdd64(a.hi, b.hi, c)
+	if c != 0 {
+		panic("poly1305: unexpected overflow")
+	}
+	return uint128{lo, hi}
+}
+
+func shiftRightBy2(a uint128) uint128 {
+	a.lo = a.lo>>2 | (a.hi&3)<<62
+	a.hi = a.hi >> 2
+	return a
+}
+
+// updateGeneric absorbs msg into the state.h accumulator. For each chunk m of
+// 128 bits of message, it computes
+//
+//     h₊ = (h + m) * r  mod  2¹³⁰ - 5
+//
+// If the msg length is not a multiple of TagSize, it assumes the last
+// incomplete chunk is the final one.
+func updateGeneric(state *macState, msg []byte) {
+	h0, h1, h2 := state.h[0], state.h[1], state.h[2]
+	r0, r1 := state.r[0], state.r[1]
+
+	for len(msg) > 0 {
+		var c uint64
+
+		// For the first step, h + m, we use a chain of bits.Add64 intrinsics.
+		// The resulting value of h might exceed 2¹³⁰ - 5, but will be partially
+		// reduced at the end of the multiplication below.
+		//
+		// The spec requires us to set a bit just above the message size, not to
+		// hide leading zeroes. For full chunks, that's 1 << 128, so we can just
+		// add 1 to the most significant (2¹²⁸) limb, h2.
+		if len(msg) >= TagSize {
+			h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(msg[0:8]), 0)
+			h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(msg[8:16]), c)
+			h2 += c + 1

 			msg = msg[TagSize:]
+		} else {
+			var buf [TagSize]byte
+			copy(buf[:], msg)
+			buf[len(msg)] = 1
+
+			h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(buf[0:8]), 0)
+			h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(buf[8:16]), c)
+			h2 += c
+
+			msg = nil
 		}

-	h[0], h[1], h[2], h[3], h[4] = h0, h1, h2, h3, h4
+		// Multiplication of big number limbs is similar to elementary school
+		// columnar multiplication. Instead of digits, there are 64-bit limbs.
+		//
+		// We are multiplying a 3 limbs number, h, by a 2 limbs number, r.
+		//
+		//                        h2    h1    h0  x
+		//                              r1    r0  =
+		//                       ----------------
+		//                      h2r0  h1r0  h0r0     <-- individual 128-bit products
+		//            +   h2r1  h1r1  h0r1
+		//               ------------------------
+		//                 m3    m2    m1    m0      <-- result in 128-bit overlapping limbs
+		//               ------------------------
+		//         m3.hi m2.hi m1.hi m0.hi           <-- carry propagation
+		//     +         m3.lo m2.lo m1.lo m0.lo
+		//        -------------------------------
+		//           t4    t3    t2    t1    t0      <-- final result in 64-bit limbs
+		//
+		// The main difference from pen-and-paper multiplication is that we do
+		// carry propagation in a separate step, as if we wrote two digit sums
+		// at first (the 128-bit limbs), and then carried the tens all at once.
+
+		h0r0 := mul64(h0, r0)
+		h1r0 := mul64(h1, r0)
+		h2r0 := mul64(h2, r0)
+		h0r1 := mul64(h0, r1)
+		h1r1 := mul64(h1, r1)
+		h2r1 := mul64(h2, r1)
+
+		// Since h2 is known to be at most 7 (5 + 1 + 1), and r0 and r1 have their
+		// top 4 bits cleared by rMask{0,1}, we know that their product is not going
+		// to overflow 64 bits, so we can ignore the high part of the products.
+		//
+		// This also means that the product doesn't have a fifth limb (t4).
+		if h2r0.hi != 0 {
+			panic("poly1305: unexpected overflow")
+		}
+		if h2r1.hi != 0 {
+			panic("poly1305: unexpected overflow")
+		}
+
+		m0 := h0r0
+		m1 := add128(h1r0, h0r1) // These two additions don't overflow thanks again
+		m2 := add128(h2r0, h1r1) // to the 4 masked bits at the top of r0 and r1.
+		m3 := h2r1
+
+		t0 := m0.lo
+		t1, c := bitsAdd64(m1.lo, m0.hi, 0)
+		t2, c := bitsAdd64(m2.lo, m1.hi, c)
+		t3, _ := bitsAdd64(m3.lo, m2.hi, c)
+
+		// Now we have the result as 4 64-bit limbs, and we need to reduce it
+		// modulo 2¹³⁰ - 5. The special shape of this Crandall prime lets us do
+		// a cheap partial reduction according to the reduction identity
+		//
+		//     c * 2¹³⁰ + n  =  c * 5 + n  mod  2¹³⁰ - 5
+		//
+		// because 2¹³⁰ = 5 mod 2¹³⁰ - 5. Partial reduction since the result is
+		// likely to be larger than 2¹³⁰ - 5, but still small enough to fit the
+		// assumptions we make about h in the rest of the code.
+		//
+		// See also https://speakerdeck.com/gtank/engineering-prime-numbers?slide=23
+
+		// We split the final result at the 2¹³⁰ mark into h and cc, the carry.
+		// Note that the carry bits are effectively shifted left by 2, in other
+		// words, cc = c * 4 for the c in the reduction identity.
+		h0, h1, h2 = t0, t1, t2&maskLow2Bits
+		cc := uint128{t2 & maskNotLow2Bits, t3}
+
+		// To add c * 5 to h, we first add cc = c * 4, and then add (cc >> 2) = c.
+
+		h0, c = bitsAdd64(h0, cc.lo, 0)
+		h1, c = bitsAdd64(h1, cc.hi, c)
+		h2 += c
+
+		cc = shiftRightBy2(cc)
+
+		h0, c = bitsAdd64(h0, cc.lo, 0)
+		h1, c = bitsAdd64(h1, cc.hi, c)
+		h2 += c
+
+		// h2 is at most 3 + 1 + 1 = 5, making the whole of h at most
+		//
+		//     5 * 2¹²⁸ + (2¹²⁸ - 1) = 6 * 2¹²⁸ - 1
+	}
+
+	state.h[0], state.h[1], state.h[2] = h0, h1, h2
 }

-func finalizeGeneric(out *[TagSize]byte, h *[5]uint32, s *[4]uint32) {
-	h0, h1, h2, h3, h4 := h[0], h[1], h[2], h[3], h[4]
+const (
+	maskLow2Bits    uint64 = 0x0000000000000003
+	maskNotLow2Bits uint64 = ^maskLow2Bits
+)

-	// h %= p reduction
-	h2 += h1 >> 26
-	h1 &= 0x3ffffff
-	h3 += h2 >> 26
-	h2 &= 0x3ffffff
-	h4 += h3 >> 26
-	h3 &= 0x3ffffff
-	h0 += 5 * (h4 >> 26)
-	h4 &= 0x3ffffff
-	h1 += h0 >> 26
-	h0 &= 0x3ffffff
+// select64 returns x if v == 1 and y if v == 0, in constant time.
+func select64(v, x, y uint64) uint64 { return ^(v-1)&x | (v-1)&y }

-	// h - p
-	t0 := h0 + 5
-	t1 := h1 + (t0 >> 26)
-	t2 := h2 + (t1 >> 26)
-	t3 := h3 + (t2 >> 26)
-	t4 := h4 + (t3 >> 26) - (1 << 26)
-	t0 &= 0x3ffffff
-	t1 &= 0x3ffffff
-	t2 &= 0x3ffffff
-	t3 &= 0x3ffffff
+// [p0, p1, p2] is 2¹³⁰ - 5 in little endian order.
+const (
+	p0 = 0xFFFFFFFFFFFFFFFB
+	p1 = 0xFFFFFFFFFFFFFFFF
+	p2 = 0x0000000000000003
+)

-	// select h if h < p else h - p
-	t_mask := (t4 >> 31) - 1
-	h_mask := ^t_mask
-	h0 = (h0 & h_mask) | (t0 & t_mask)
-	h1 = (h1 & h_mask) | (t1 & t_mask)
-	h2 = (h2 & h_mask) | (t2 & t_mask)
-	h3 = (h3 & h_mask) | (t3 & t_mask)
-	h4 = (h4 & h_mask) | (t4 & t_mask)
+// finalize completes the modular reduction of h and computes
+//
+//     out = h + s  mod  2¹²⁸
+//
+func finalize(out *[TagSize]byte, h *[3]uint64, s *[2]uint64) {
+	h0, h1, h2 := h[0], h[1], h[2]

-	// h %= 2^128
-	h0 |= h1 << 26
-	h1 = ((h1 >> 6) | (h2 << 20))
-	h2 = ((h2 >> 12) | (h3 << 14))
-	h3 = ((h3 >> 18) | (h4 << 8))
+	// After the partial reduction in updateGeneric, h might be more than
+	// 2¹³⁰ - 5, but will be less than 2 * (2¹³⁰ - 5). To complete the reduction
+	// in constant time, we compute t = h - (2¹³⁰ - 5), and select h as the
+	// result if the subtraction underflows, and t otherwise.

-	// s: the s part of the key
-	// tag = (h + s) % (2^128)
-	t := uint64(h0) + uint64(s[0])
-	h0 = uint32(t)
-	t = uint64(h1) + uint64(s[1]) + (t >> 32)
-	h1 = uint32(t)
-	t = uint64(h2) + uint64(s[2]) + (t >> 32)
-	h2 = uint32(t)
-	t = uint64(h3) + uint64(s[3]) + (t >> 32)
-	h3 = uint32(t)
+	hMinusP0, b := bitsSub64(h0, p0, 0)
+	hMinusP1, b := bitsSub64(h1, p1, b)
+	_, b = bitsSub64(h2, p2, b)

-	binary.LittleEndian.PutUint32(out[0:], h0)
-	binary.LittleEndian.PutUint32(out[4:], h1)
-	binary.LittleEndian.PutUint32(out[8:], h2)
-	binary.LittleEndian.PutUint32(out[12:], h3)
+	// h = h if h < p else h - p
+	h0 = select64(b, h0, hMinusP0)
+	h1 = select64(b, h1, hMinusP1)
+
+	// Finally, we compute the last Poly1305 step
+	//
+	//     tag = h + s  mod  2¹²⁸
+	//
+	// by just doing a wide addition with the 128 low bits of h and discarding
+	// the overflow.
+	h0, c := bitsAdd64(h0, s[0], 0)
+	h1, _ = bitsAdd64(h1, s[1], c)
+
+	binary.LittleEndian.PutUint64(out[0:8], h0)
+	binary.LittleEndian.PutUint64(out[8:16], h1)
 }
--- a/vendor/golang.org/x/crypto/poly1305/sum_noasm.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_noasm.go
@ -2,14 +2,11 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// +build s390x,!go1.11 !arm,!amd64,!s390x gccgo appengine nacl
+// +build s390x,!go1.11 !amd64,!s390x,!ppc64le gccgo appengine nacl

 package poly1305

-// Sum generates an authenticator for msg using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
+func sum(out *[TagSize]byte, msg []byte, key *[32]byte) {
 	h := newMAC(key)
 	h.Write(msg)
 	h.Sum(out)
--- a/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.go
@ -0,0 +1,58 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ppc64le,!gccgo,!appengine
+
+package poly1305
+
+//go:noescape
+func update(state *macState, msg []byte)
+
+func sum(out *[16]byte, m []byte, key *[32]byte) {
+	h := newMAC(key)
+	h.Write(m)
+	h.Sum(out)
+}
+
+func newMAC(key *[32]byte) (h mac) {
+	initialize(key, &h.r, &h.s)
+	return
+}
+
+// mac is a wrapper for macGeneric that redirects calls that would have gone to
+// updateGeneric to update.
+//
+// Its Write and Sum methods are otherwise identical to the macGeneric ones, but
+// using function pointers would carry a major performance cost.
+type mac struct{ macGeneric }
+
+func (h *mac) Write(p []byte) (int, error) {
+	nn := len(p)
+	if h.offset > 0 {
+		n := copy(h.buffer[h.offset:], p)
+		if h.offset+n < TagSize {
+			h.offset += n
+			return nn, nil
+		}
+		p = p[n:]
+		h.offset = 0
+		update(&h.macState, h.buffer[:])
+	}
+	if n := len(p) - (len(p) % TagSize); n > 0 {
+		update(&h.macState, p[:n])
+		p = p[n:]
+	}
+	if len(p) > 0 {
+		h.offset += copy(h.buffer[h.offset:], p)
+	}
+	return nn, nil
+}
+
+func (h *mac) Sum(out *[16]byte) {
+	state := h.macState
+	if h.offset > 0 {
+		update(&state, h.buffer[:h.offset])
+	}
+	finalize(out, &state.h, &state.s)
+}
--- a/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.s
+++ b/vendor/golang.org/x/crypto/poly1305/sum_ppc64le.s
@ -0,0 +1,181 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ppc64le,!gccgo,!appengine
+
+#include "textflag.h"
+
+// This was ported from the amd64 implementation.
+
+#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
+	MOVD (msg), t0;  \
+	MOVD 8(msg), t1; \
+	MOVD $1, t2;     \
+	ADDC t0, h0, h0; \
+	ADDE t1, h1, h1; \
+	ADDE t2, h2;     \
+	ADD  $16, msg
+
+#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
+	MULLD  r0, h0, t0;  \
+	MULLD  r0, h1, t4;  \
+	MULHDU r0, h0, t1;  \
+	MULHDU r0, h1, t5;  \
+	ADDC   t4, t1, t1;  \
+	MULLD  r0, h2, t2;  \
+	ADDZE  t5;          \
+	MULHDU r1, h0, t4;  \
+	MULLD  r1, h0, h0;  \
+	ADD    t5, t2, t2;  \
+	ADDC   h0, t1, t1;  \
+	MULLD  h2, r1, t3;  \
+	ADDZE  t4, h0;      \
+	MULHDU r1, h1, t5;  \
+	MULLD  r1, h1, t4;  \
+	ADDC   t4, t2, t2;  \
+	ADDE   t5, t3, t3;  \
+	ADDC   h0, t2, t2;  \
+	MOVD   $-4, t4;     \
+	MOVD   t0, h0;      \
+	MOVD   t1, h1;      \
+	ADDZE  t3;          \
+	ANDCC  $3, t2, h2;  \
+	AND    t2, t4, t0;  \
+	ADDC   t0, h0, h0;  \
+	ADDE   t3, h1, h1;  \
+	SLD    $62, t3, t4; \
+	SRD    $2, t2;      \
+	ADDZE  h2;          \
+	OR     t4, t2, t2;  \
+	SRD    $2, t3;      \
+	ADDC   t2, h0, h0;  \
+	ADDE   t3, h1, h1;  \
+	ADDZE  h2
+
+DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
+DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
+GLOBL ·poly1305Mask<>(SB), RODATA, $16
+
+// func update(state *[7]uint64, msg []byte)
+TEXT ·update(SB), $0-32
+	MOVD state+0(FP), R3
+	MOVD msg_base+8(FP), R4
+	MOVD msg_len+16(FP), R5
+
+	MOVD 0(R3), R8   // h0
+	MOVD 8(R3), R9   // h1
+	MOVD 16(R3), R10 // h2
+	MOVD 24(R3), R11 // r0
+	MOVD 32(R3), R12 // r1
+
+	CMP R5, $16
+	BLT bytes_between_0_and_15
+
+loop:
+	POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
+
+multiply:
+	POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
+	ADD $-16, R5
+	CMP R5, $16
+	BGE loop
+
+bytes_between_0_and_15:
+	CMP  $0, R5
+	BEQ  done
+	MOVD $0, R16 // h0
+	MOVD $0, R17 // h1
+
+flush_buffer:
+	CMP R5, $8
+	BLE just1
+
+	MOVD $8, R21
+	SUB  R21, R5, R21
+
+	// Greater than 8 -- load the rightmost remaining bytes in msg
+	// and put into R17 (h1)
+	MOVD (R4)(R21), R17
+	MOVD $16, R22
+
+	// Find the offset to those bytes
+	SUB R5, R22, R22
+	SLD $3, R22
+
+	// Shift to get only the bytes in msg
+	SRD R22, R17, R17
+
+	// Put 1 at high end
+	MOVD $1, R23
+	SLD  $3, R21
+	SLD  R21, R23, R23
+	OR   R23, R17, R17
+
+	// Remainder is 8
+	MOVD $8, R5
+
+just1:
+	CMP R5, $8
+	BLT less8
+
+	// Exactly 8
+	MOVD (R4), R16
+
+	CMP $0, R17
+
+	// Check if we've already set R17; if not
+	// set 1 to indicate end of msg.
+	BNE  carry
+	MOVD $1, R17
+	BR   carry
+
+less8:
+	MOVD  $0, R16   // h0
+	MOVD  $0, R22   // shift count
+	CMP   R5, $4
+	BLT   less4
+	MOVWZ (R4), R16
+	ADD   $4, R4
+	ADD   $-4, R5
+	MOVD  $32, R22
+
+less4:
+	CMP   R5, $2
+	BLT   less2
+	MOVHZ (R4), R21
+	SLD   R22, R21, R21
+	OR    R16, R21, R16
+	ADD   $16, R22
+	ADD   $-2, R5
+	ADD   $2, R4
+
+less2:
+	CMP   $0, R5
+	BEQ   insert1
+	MOVBZ (R4), R21
+	SLD   R22, R21, R21
+	OR    R16, R21, R16
+	ADD   $8, R22
+
+insert1:
+	// Insert 1 at end of msg
+	MOVD $1, R21
+	SLD  R22, R21, R21
+	OR   R16, R21, R16
+
+carry:
+	// Add new values to h0, h1, h2
+	ADDC R16, R8
+	ADDE R17, R9
+	ADDE $0, R10
+	MOVD $16, R5
+	ADD  R5, R4
+	BR   multiply
+
+done:
+	// Save h0, h1, h2 in state
+	MOVD R8, 0(R3)
+	MOVD R9, 8(R3)
+	MOVD R10, 16(R3)
+	RET
--- a/vendor/golang.org/x/crypto/poly1305/sum_s390x.go
+++ b/vendor/golang.org/x/crypto/poly1305/sum_s390x.go
@ -22,10 +22,7 @@ func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]byte)
 //go:noescape
 func poly1305vmsl(out *[16]byte, m *byte, mlen uint64, key *[32]byte)

-// Sum generates an authenticator for m using a one-time key and puts the
-// 16-byte result into out. Authenticating two different messages with the same
-// key allows an attacker to forge messages at will.
-func Sum(out *[16]byte, m []byte, key *[32]byte) {
+func sum(out *[16]byte, m []byte, key *[32]byte) {
 	if cpu.S390X.HasVX {
 		var mPtr *byte
 		if len(m) > 0 {
--- a/vendor/golang.org/x/crypto/ssh/certs.go
+++ b/vendor/golang.org/x/crypto/ssh/certs.go
@ -22,7 +22,9 @@ const (
 	CertAlgoECDSA256v01   = "ecdsa-sha2-nistp256-cert-v01@openssh.com"
 	CertAlgoECDSA384v01   = "ecdsa-sha2-nistp384-cert-v01@openssh.com"
 	CertAlgoECDSA521v01   = "ecdsa-sha2-nistp521-cert-v01@openssh.com"
+	CertAlgoSKECDSA256v01 = "sk-ecdsa-sha2-nistp256-cert-v01@openssh.com"
 	CertAlgoED25519v01    = "ssh-ed25519-cert-v01@openssh.com"
+	CertAlgoSKED25519v01  = "sk-ssh-ed25519-cert-v01@openssh.com"
 )

 // Certificate types distinguish between host and user
@ -37,6 +39,7 @@ const (
 type Signature struct {
 	Format string
 	Blob   []byte
+	Rest   []byte `ssh:"rest"`
 }

 // CertTimeInfinity can be used for OpenSSHCertV01.ValidBefore to indicate that
@ -434,7 +437,9 @@ var certAlgoNames = map[string]string{
 	KeyAlgoECDSA256:   CertAlgoECDSA256v01,
 	KeyAlgoECDSA384:   CertAlgoECDSA384v01,
 	KeyAlgoECDSA521:   CertAlgoECDSA521v01,
+	KeyAlgoSKECDSA256: CertAlgoSKECDSA256v01,
 	KeyAlgoED25519:    CertAlgoED25519v01,
+	KeyAlgoSKED25519:  CertAlgoSKED25519v01,
 }

 // certToPrivAlgo returns the underlying algorithm for a certificate algorithm.
@ -518,6 +523,12 @@ func parseSignatureBody(in []byte) (out *Signature, rest []byte, ok bool) {
 		return
 	}

+	switch out.Format {
+	case KeyAlgoSKECDSA256, CertAlgoSKECDSA256v01, KeyAlgoSKED25519, CertAlgoSKED25519v01:
+		out.Rest = in
+		return out, nil, ok
+	}
+
 	return out, in, ok
 }

--- a/vendor/golang.org/x/crypto/ssh/cipher.go
+++ b/vendor/golang.org/x/crypto/ssh/cipher.go
@ -16,9 +16,8 @@ import (
 	"hash"
 	"io"
 	"io/ioutil"
-	"math/bits"

-	"golang.org/x/crypto/internal/chacha20"
+	"golang.org/x/crypto/chacha20"
 	"golang.org/x/crypto/poly1305"
 )

@ -642,8 +641,8 @@ const chacha20Poly1305ID = "chacha20-poly1305@openssh.com"
 // the methods here also implement padding, which RFC4253 Section 6
 // also requires of stream ciphers.
 type chacha20Poly1305Cipher struct {
-	lengthKey  [8]uint32
-	contentKey [8]uint32
+	lengthKey  [32]byte
+	contentKey [32]byte
 	buf        []byte
 }

@ -656,21 +655,21 @@ func newChaCha20Cipher(key, unusedIV, unusedMACKey []byte, unusedAlgs directionA
 		buf: make([]byte, 256),
 	}

-	for i := range c.contentKey {
-		c.contentKey[i] = binary.LittleEndian.Uint32(key[i*4 : (i+1)*4])
-	}
-	for i := range c.lengthKey {
-		c.lengthKey[i] = binary.LittleEndian.Uint32(key[(i+8)*4 : (i+9)*4])
-	}
+	copy(c.contentKey[:], key[:32])
+	copy(c.lengthKey[:], key[32:])
 	return c, nil
 }

 func (c *chacha20Poly1305Cipher) readCipherPacket(seqNum uint32, r io.Reader) ([]byte, error) {
-	nonce := [3]uint32{0, 0, bits.ReverseBytes32(seqNum)}
-	s := chacha20.New(c.contentKey, nonce)
-	var polyKey [32]byte
+	nonce := make([]byte, 12)
+	binary.BigEndian.PutUint32(nonce[8:], seqNum)
+	s, err := chacha20.NewUnauthenticatedCipher(c.contentKey[:], nonce)
+	if err != nil {
+		return nil, err
+	}
+	var polyKey, discardBuf [32]byte
 	s.XORKeyStream(polyKey[:], polyKey[:])
-	s.Advance() // skip next 32 bytes
+	s.XORKeyStream(discardBuf[:], discardBuf[:]) // skip the next 32 bytes

 	encryptedLength := c.buf[:4]
 	if _, err := io.ReadFull(r, encryptedLength); err != nil {
@ -678,7 +677,11 @@ func (c *chacha20Poly1305Cipher) readCipherPacket(seqNum uint32, r io.Reader) ([
 	}

 	var lenBytes [4]byte
-	chacha20.New(c.lengthKey, nonce).XORKeyStream(lenBytes[:], encryptedLength)
+	ls, err := chacha20.NewUnauthenticatedCipher(c.lengthKey[:], nonce)
+	if err != nil {
+		return nil, err
+	}
+	ls.XORKeyStream(lenBytes[:], encryptedLength)

 	length := binary.BigEndian.Uint32(lenBytes[:])
 	if length > maxPacket {
@ -724,11 +727,15 @@ func (c *chacha20Poly1305Cipher) readCipherPacket(seqNum uint32, r io.Reader) ([
 }

 func (c *chacha20Poly1305Cipher) writeCipherPacket(seqNum uint32, w io.Writer, rand io.Reader, payload []byte) error {
-	nonce := [3]uint32{0, 0, bits.ReverseBytes32(seqNum)}
-	s := chacha20.New(c.contentKey, nonce)
-	var polyKey [32]byte
+	nonce := make([]byte, 12)
+	binary.BigEndian.PutUint32(nonce[8:], seqNum)
+	s, err := chacha20.NewUnauthenticatedCipher(c.contentKey[:], nonce)
+	if err != nil {
+		return err
+	}
+	var polyKey, discardBuf [32]byte
 	s.XORKeyStream(polyKey[:], polyKey[:])
-	s.Advance() // skip next 32 bytes
+	s.XORKeyStream(discardBuf[:], discardBuf[:]) // skip the next 32 bytes

 	// There is no blocksize, so fall back to multiple of 8 byte
 	// padding, as described in RFC 4253, Sec 6.
@ -748,7 +755,11 @@ func (c *chacha20Poly1305Cipher) writeCipherPacket(seqNum uint32, w io.Writer, r
 	}

 	binary.BigEndian.PutUint32(c.buf, uint32(1+len(payload)+padding))
-	chacha20.New(c.lengthKey, nonce).XORKeyStream(c.buf, c.buf[:4])
+	ls, err := chacha20.NewUnauthenticatedCipher(c.lengthKey[:], nonce)
+	if err != nil {
+		return err
+	}
+	ls.XORKeyStream(c.buf, c.buf[:4])
 	c.buf[4] = byte(padding)
 	copy(c.buf[5:], payload)
 	packetEnd := 5 + len(payload) + padding
--- a/vendor/golang.org/x/crypto/ssh/client_auth.go
+++ b/vendor/golang.org/x/crypto/ssh/client_auth.go
@ -523,3 +523,117 @@ func (r *retryableAuthMethod) method() string {
 func RetryableAuthMethod(auth AuthMethod, maxTries int) AuthMethod {
 	return &retryableAuthMethod{authMethod: auth, maxTries: maxTries}
 }
+
+// GSSAPIWithMICAuthMethod is an AuthMethod with "gssapi-with-mic" authentication.
+// See RFC 4462 section 3
+// gssAPIClient is implementation of the GSSAPIClient interface, see the definition of the interface for details.
+// target is the server host you want to log in to.
+func GSSAPIWithMICAuthMethod(gssAPIClient GSSAPIClient, target string) AuthMethod {
+	if gssAPIClient == nil {
+		panic("gss-api client must be not nil with enable gssapi-with-mic")
+	}
+	return &gssAPIWithMICCallback{gssAPIClient: gssAPIClient, target: target}
+}
+
+type gssAPIWithMICCallback struct {
+	gssAPIClient GSSAPIClient
+	target       string
+}
+
+func (g *gssAPIWithMICCallback) auth(session []byte, user string, c packetConn, rand io.Reader) (authResult, []string, error) {
+	m := &userAuthRequestMsg{
+		User:    user,
+		Service: serviceSSH,
+		Method:  g.method(),
+	}
+	// The GSS-API authentication method is initiated when the client sends an SSH_MSG_USERAUTH_REQUEST.
+	// See RFC 4462 section 3.2.
+	m.Payload = appendU32(m.Payload, 1)
+	m.Payload = appendString(m.Payload, string(krb5OID))
+	if err := c.writePacket(Marshal(m)); err != nil {
+		return authFailure, nil, err
+	}
+	// The server responds to the SSH_MSG_USERAUTH_REQUEST with either an
+	// SSH_MSG_USERAUTH_FAILURE if none of the mechanisms are supported or
+	// with an SSH_MSG_USERAUTH_GSSAPI_RESPONSE.
+	// See RFC 4462 section 3.3.
+	// OpenSSH supports Kerberos V5 mechanism only for GSS-API authentication,so I don't want to check
+	// selected mech if it is valid.
+	packet, err := c.readPacket()
+	if err != nil {
+		return authFailure, nil, err
+	}
+	userAuthGSSAPIResp := &userAuthGSSAPIResponse{}
+	if err := Unmarshal(packet, userAuthGSSAPIResp); err != nil {
+		return authFailure, nil, err
+	}
+	// Start the loop into the exchange token.
+	// See RFC 4462 section 3.4.
+	var token []byte
+	defer g.gssAPIClient.DeleteSecContext()
+	for {
+		// Initiates the establishment of a security context between the application and a remote peer.
+		nextToken, needContinue, err := g.gssAPIClient.InitSecContext("host@"+g.target, token, false)
+		if err != nil {
+			return authFailure, nil, err
+		}
+		if len(nextToken) > 0 {
+			if err := c.writePacket(Marshal(&userAuthGSSAPIToken{
+				Token: nextToken,
+			})); err != nil {
+				return authFailure, nil, err
+			}
+		}
+		if !needContinue {
+			break
+		}
+		packet, err = c.readPacket()
+		if err != nil {
+			return authFailure, nil, err
+		}
+		switch packet[0] {
+		case msgUserAuthFailure:
+			var msg userAuthFailureMsg
+			if err := Unmarshal(packet, &msg); err != nil {
+				return authFailure, nil, err
+			}
+			if msg.PartialSuccess {
+				return authPartialSuccess, msg.Methods, nil
+			}
+			return authFailure, msg.Methods, nil
+		case msgUserAuthGSSAPIError:
+			userAuthGSSAPIErrorResp := &userAuthGSSAPIError{}
+			if err := Unmarshal(packet, userAuthGSSAPIErrorResp); err != nil {
+				return authFailure, nil, err
+			}
+			return authFailure, nil, fmt.Errorf("GSS-API Error:\n"+
+				"Major Status: %d\n"+
+				"Minor Status: %d\n"+
+				"Error Message: %s\n", userAuthGSSAPIErrorResp.MajorStatus, userAuthGSSAPIErrorResp.MinorStatus,
+				userAuthGSSAPIErrorResp.Message)
+		case msgUserAuthGSSAPIToken:
+			userAuthGSSAPITokenReq := &userAuthGSSAPIToken{}
+			if err := Unmarshal(packet, userAuthGSSAPITokenReq); err != nil {
+				return authFailure, nil, err
+			}
+			token = userAuthGSSAPITokenReq.Token
+		}
+	}
+	// Binding Encryption Keys.
+	// See RFC 4462 section 3.5.
+	micField := buildMIC(string(session), user, "ssh-connection", "gssapi-with-mic")
+	micToken, err := g.gssAPIClient.GetMIC(micField)
+	if err != nil {
+		return authFailure, nil, err
+	}
+	if err := c.writePacket(Marshal(&userAuthGSSAPIMIC{
+		MIC: micToken,
+	})); err != nil {
+		return authFailure, nil, err
+	}
+	return handleAuthResponse(c)
+}
+
+func (g *gssAPIWithMICCallback) method() string {
+	return "gssapi-with-mic"
+}
--- a/vendor/golang.org/x/crypto/ssh/common.go
+++ b/vendor/golang.org/x/crypto/ssh/common.go
@ -51,6 +51,21 @@ var supportedKexAlgos = []string{
 	kexAlgoDH14SHA1, kexAlgoDH1SHA1,
 }

+// serverForbiddenKexAlgos contains key exchange algorithms, that are forbidden
+// for the server half.
+var serverForbiddenKexAlgos = map[string]struct{}{
+	kexAlgoDHGEXSHA1:   {}, // server half implementation is only minimal to satisfy the automated tests
+	kexAlgoDHGEXSHA256: {}, // server half implementation is only minimal to satisfy the automated tests
+}
+
+// preferredKexAlgos specifies the default preference for key-exchange algorithms
+// in preference order.
+var preferredKexAlgos = []string{
+	kexAlgoCurve25519SHA256,
+	kexAlgoECDH256, kexAlgoECDH384, kexAlgoECDH521,
+	kexAlgoDH14SHA1,
+}
+
 // supportedHostKeyAlgos specifies the supported host-key algorithms (i.e. methods
 // of authenticating servers) in preference order.
 var supportedHostKeyAlgos = []string{
@ -109,6 +124,7 @@ func findCommon(what string, client []string, server []string) (common string, e
 	return "", fmt.Errorf("ssh: no common algorithm for %s; client offered: %v, server offered: %v", what, client, server)
 }

+// directionAlgorithms records algorithm choices in one direction (either read or write)
 type directionAlgorithms struct {
 	Cipher      string
 	MAC         string
@ -137,7 +153,7 @@ type algorithms struct {
 	r       directionAlgorithms
 }

-func findAgreedAlgorithms(clientKexInit, serverKexInit *kexInitMsg) (algs *algorithms, err error) {
+func findAgreedAlgorithms(isClient bool, clientKexInit, serverKexInit *kexInitMsg) (algs *algorithms, err error) {
 	result := &algorithms{}

 	result.kex, err = findCommon("key exchange", clientKexInit.KexAlgos, serverKexInit.KexAlgos)
@ -150,32 +166,37 @@ func findAgreedAlgorithms(clientKexInit, serverKexInit *kexInitMsg) (algs *algor
 		return
 	}

-	result.w.Cipher, err = findCommon("client to server cipher", clientKexInit.CiphersClientServer, serverKexInit.CiphersClientServer)
+	stoc, ctos := &result.w, &result.r
+	if isClient {
+		ctos, stoc = stoc, ctos
+	}
+
+	ctos.Cipher, err = findCommon("client to server cipher", clientKexInit.CiphersClientServer, serverKexInit.CiphersClientServer)
 	if err != nil {
 		return
 	}

-	result.r.Cipher, err = findCommon("server to client cipher", clientKexInit.CiphersServerClient, serverKexInit.CiphersServerClient)
+	stoc.Cipher, err = findCommon("server to client cipher", clientKexInit.CiphersServerClient, serverKexInit.CiphersServerClient)
 	if err != nil {
 		return
 	}

-	result.w.MAC, err = findCommon("client to server MAC", clientKexInit.MACsClientServer, serverKexInit.MACsClientServer)
+	ctos.MAC, err = findCommon("client to server MAC", clientKexInit.MACsClientServer, serverKexInit.MACsClientServer)
 	if err != nil {
 		return
 	}

-	result.r.MAC, err = findCommon("server to client MAC", clientKexInit.MACsServerClient, serverKexInit.MACsServerClient)
+	stoc.MAC, err = findCommon("server to client MAC", clientKexInit.MACsServerClient, serverKexInit.MACsServerClient)
 	if err != nil {
 		return
 	}

-	result.w.Compression, err = findCommon("client to server compression", clientKexInit.CompressionClientServer, serverKexInit.CompressionClientServer)
+	ctos.Compression, err = findCommon("client to server compression", clientKexInit.CompressionClientServer, serverKexInit.CompressionClientServer)
 	if err != nil {
 		return
 	}

-	result.r.Compression, err = findCommon("server to client compression", clientKexInit.CompressionServerClient, serverKexInit.CompressionServerClient)
+	stoc.Compression, err = findCommon("server to client compression", clientKexInit.CompressionServerClient, serverKexInit.CompressionServerClient)
 	if err != nil {
 		return
 	}
@ -233,7 +254,7 @@ func (c *Config) SetDefaults() {
 	c.Ciphers = ciphers

 	if c.KeyExchanges == nil {
-		c.KeyExchanges = supportedKexAlgos
+		c.KeyExchanges = preferredKexAlgos
 	}

 	if c.MACs == nil {
--- a/vendor/golang.org/x/crypto/ssh/handshake.go
+++ b/vendor/golang.org/x/crypto/ssh/handshake.go
@ -543,7 +543,8 @@ func (t *handshakeTransport) enterKeyExchange(otherInitPacket []byte) error {

 	clientInit := otherInit
 	serverInit := t.sentInitMsg
-	if len(t.hostKeys) == 0 {
+	isClient := len(t.hostKeys) == 0
+	if isClient {
 		clientInit, serverInit = serverInit, clientInit

 		magics.clientKexInit = t.sentInitPacket
@ -551,7 +552,7 @@ func (t *handshakeTransport) enterKeyExchange(otherInitPacket []byte) error {
 	}

 	var err error
-	t.algorithms, err = findAgreedAlgorithms(clientInit, serverInit)
+	t.algorithms, err = findAgreedAlgorithms(isClient, clientInit, serverInit)
 	if err != nil {
 		return err
 	}
--- a/vendor/golang.org/x/crypto/ssh/kex.go
+++ b/vendor/golang.org/x/crypto/ssh/kex.go
@ -10,7 +10,9 @@ import (
 	"crypto/elliptic"
 	"crypto/rand"
 	"crypto/subtle"
+	"encoding/binary"
 	"errors"
+	"fmt"
 	"io"
 	"math/big"

@ -24,6 +26,12 @@ const (
 	kexAlgoECDH384          = "ecdh-sha2-nistp384"
 	kexAlgoECDH521          = "ecdh-sha2-nistp521"
 	kexAlgoCurve25519SHA256 = "curve25519-sha256@libssh.org"
+
+	// For the following kex only the client half contains a production
+	// ready implementation. The server half only consists of a minimal
+	// implementation to satisfy the automated tests.
+	kexAlgoDHGEXSHA1   = "diffie-hellman-group-exchange-sha1"
+	kexAlgoDHGEXSHA256 = "diffie-hellman-group-exchange-sha256"
 )

 // kexResult captures the outcome of a key exchange.
@ -204,7 +212,7 @@ func (group *dhGroup) Server(c packetConn, randSource io.Reader, magics *handsha
 		HostKey:   hostKeyBytes,
 		Signature: sig,
 		Hash:      crypto.SHA1,
-	}, nil
+	}, err
 }

 // ecdh performs Elliptic Curve Diffie-Hellman key exchange as
@ -402,6 +410,8 @@ func init() {
 	kexAlgoMap[kexAlgoECDH384] = &ecdh{elliptic.P384()}
 	kexAlgoMap[kexAlgoECDH256] = &ecdh{elliptic.P256()}
 	kexAlgoMap[kexAlgoCurve25519SHA256] = &curve25519sha256{}
+	kexAlgoMap[kexAlgoDHGEXSHA1] = &dhGEXSHA{hashFunc: crypto.SHA1}
+	kexAlgoMap[kexAlgoDHGEXSHA256] = &dhGEXSHA{hashFunc: crypto.SHA256}
 }

 // curve25519sha256 implements the curve25519-sha256@libssh.org key
@ -538,3 +548,242 @@ func (kex *curve25519sha256) Server(c packetConn, rand io.Reader, magics *handsh
 		Hash:      crypto.SHA256,
 	}, nil
 }
+
+// dhGEXSHA implements the diffie-hellman-group-exchange-sha1 and
+// diffie-hellman-group-exchange-sha256 key agreement protocols,
+// as described in RFC 4419
+type dhGEXSHA struct {
+	g, p     *big.Int
+	hashFunc crypto.Hash
+}
+
+const numMRTests = 64
+
+const (
+	dhGroupExchangeMinimumBits   = 2048
+	dhGroupExchangePreferredBits = 2048
+	dhGroupExchangeMaximumBits   = 8192
+)
+
+func (gex *dhGEXSHA) diffieHellman(theirPublic, myPrivate *big.Int) (*big.Int, error) {
+	if theirPublic.Sign() <= 0 || theirPublic.Cmp(gex.p) >= 0 {
+		return nil, fmt.Errorf("ssh: DH parameter out of bounds")
+	}
+	return new(big.Int).Exp(theirPublic, myPrivate, gex.p), nil
+}
+
+func (gex *dhGEXSHA) Client(c packetConn, randSource io.Reader, magics *handshakeMagics) (*kexResult, error) {
+	// Send GexRequest
+	kexDHGexRequest := kexDHGexRequestMsg{
+		MinBits:      dhGroupExchangeMinimumBits,
+		PreferedBits: dhGroupExchangePreferredBits,
+		MaxBits:      dhGroupExchangeMaximumBits,
+	}
+	if err := c.writePacket(Marshal(&kexDHGexRequest)); err != nil {
+		return nil, err
+	}
+
+	// Receive GexGroup
+	packet, err := c.readPacket()
+	if err != nil {
+		return nil, err
+	}
+
+	var kexDHGexGroup kexDHGexGroupMsg
+	if err = Unmarshal(packet, &kexDHGexGroup); err != nil {
+		return nil, err
+	}
+
+	// reject if p's bit length < dhGroupExchangeMinimumBits or > dhGroupExchangeMaximumBits
+	if kexDHGexGroup.P.BitLen() < dhGroupExchangeMinimumBits || kexDHGexGroup.P.BitLen() > dhGroupExchangeMaximumBits {
+		return nil, fmt.Errorf("ssh: server-generated gex p is out of range (%d bits)", kexDHGexGroup.P.BitLen())
+	}
+
+	gex.p = kexDHGexGroup.P
+	gex.g = kexDHGexGroup.G
+
+	// Check if p is safe by verifing that p and (p-1)/2 are primes
+	one := big.NewInt(1)
+	var pHalf = &big.Int{}
+	pHalf.Rsh(gex.p, 1)
+	if !gex.p.ProbablyPrime(numMRTests) || !pHalf.ProbablyPrime(numMRTests) {
+		return nil, fmt.Errorf("ssh: server provided gex p is not safe")
+	}
+
+	// Check if g is safe by verifing that g > 1 and g < p - 1
+	var pMinusOne = &big.Int{}
+	pMinusOne.Sub(gex.p, one)
+	if gex.g.Cmp(one) != 1 && gex.g.Cmp(pMinusOne) != -1 {
+		return nil, fmt.Errorf("ssh: server provided gex g is not safe")
+	}
+
+	// Send GexInit
+	x, err := rand.Int(randSource, pHalf)
+	if err != nil {
+		return nil, err
+	}
+	X := new(big.Int).Exp(gex.g, x, gex.p)
+	kexDHGexInit := kexDHGexInitMsg{
+		X: X,
+	}
+	if err := c.writePacket(Marshal(&kexDHGexInit)); err != nil {
+		return nil, err
+	}
+
+	// Receive GexReply
+	packet, err = c.readPacket()
+	if err != nil {
+		return nil, err
+	}
+
+	var kexDHGexReply kexDHGexReplyMsg
+	if err = Unmarshal(packet, &kexDHGexReply); err != nil {
+		return nil, err
+	}
+
+	kInt, err := gex.diffieHellman(kexDHGexReply.Y, x)
+	if err != nil {
+		return nil, err
+	}
+
+	// Check if k is safe by verifing that k > 1 and k < p - 1
+	if kInt.Cmp(one) != 1 && kInt.Cmp(pMinusOne) != -1 {
+		return nil, fmt.Errorf("ssh: derived k is not safe")
+	}
+
+	h := gex.hashFunc.New()
+	magics.write(h)
+	writeString(h, kexDHGexReply.HostKey)
+	binary.Write(h, binary.BigEndian, uint32(dhGroupExchangeMinimumBits))
+	binary.Write(h, binary.BigEndian, uint32(dhGroupExchangePreferredBits))
+	binary.Write(h, binary.BigEndian, uint32(dhGroupExchangeMaximumBits))
+	writeInt(h, gex.p)
+	writeInt(h, gex.g)
+	writeInt(h, X)
+	writeInt(h, kexDHGexReply.Y)
+	K := make([]byte, intLength(kInt))
+	marshalInt(K, kInt)
+	h.Write(K)
+
+	return &kexResult{
+		H:         h.Sum(nil),
+		K:         K,
+		HostKey:   kexDHGexReply.HostKey,
+		Signature: kexDHGexReply.Signature,
+		Hash:      gex.hashFunc,
+	}, nil
+}
+
+// Server half implementation of the Diffie Hellman Key Exchange with SHA1 and SHA256.
+//
+// This is a minimal implementation to satisfy the automated tests.
+func (gex *dhGEXSHA) Server(c packetConn, randSource io.Reader, magics *handshakeMagics, priv Signer) (result *kexResult, err error) {
+	// Receive GexRequest
+	packet, err := c.readPacket()
+	if err != nil {
+		return
+	}
+	var kexDHGexRequest kexDHGexRequestMsg
+	if err = Unmarshal(packet, &kexDHGexRequest); err != nil {
+		return
+	}
+
+	// smoosh the user's preferred size into our own limits
+	if kexDHGexRequest.PreferedBits > dhGroupExchangeMaximumBits {
+		kexDHGexRequest.PreferedBits = dhGroupExchangeMaximumBits
+	}
+	if kexDHGexRequest.PreferedBits < dhGroupExchangeMinimumBits {
+		kexDHGexRequest.PreferedBits = dhGroupExchangeMinimumBits
+	}
+	// fix min/max if they're inconsistent.  technically, we could just pout
+	// and hang up, but there's no harm in giving them the benefit of the
+	// doubt and just picking a bitsize for them.
+	if kexDHGexRequest.MinBits > kexDHGexRequest.PreferedBits {
+		kexDHGexRequest.MinBits = kexDHGexRequest.PreferedBits
+	}
+	if kexDHGexRequest.MaxBits < kexDHGexRequest.PreferedBits {
+		kexDHGexRequest.MaxBits = kexDHGexRequest.PreferedBits
+	}
+
+	// Send GexGroup
+	// This is the group called diffie-hellman-group14-sha1 in RFC
+	// 4253 and Oakley Group 14 in RFC 3526.
+	p, _ := new(big.Int).SetString("FFFFFFFFFFFFFFFFC90FDAA22168C234C4C6628B80DC1CD129024E088A67CC74020BBEA63B139B22514A08798E3404DDEF9519B3CD3A431B302B0A6DF25F14374FE1356D6D51C245E485B576625E7EC6F44C42E9A637ED6B0BFF5CB6F406B7EDEE386BFB5A899FA5AE9F24117C4B1FE649286651ECE45B3DC2007CB8A163BF0598DA48361C55D39A69163FA8FD24CF5F83655D23DCA3AD961C62F356208552BB9ED529077096966D670C354E4ABC9804F1746C08CA18217C32905E462E36CE3BE39E772C180E86039B2783A2EC07A28FB5C55DF06F4C52C9DE2BCBF6955817183995497CEA956AE515D2261898FA051015728E5A8AACAA68FFFFFFFFFFFFFFFF", 16)
+	gex.p = p
+	gex.g = big.NewInt(2)
+
+	kexDHGexGroup := kexDHGexGroupMsg{
+		P: gex.p,
+		G: gex.g,
+	}
+	if err := c.writePacket(Marshal(&kexDHGexGroup)); err != nil {
+		return nil, err
+	}
+
+	// Receive GexInit
+	packet, err = c.readPacket()
+	if err != nil {
+		return
+	}
+	var kexDHGexInit kexDHGexInitMsg
+	if err = Unmarshal(packet, &kexDHGexInit); err != nil {
+		return
+	}
+
+	var pHalf = &big.Int{}
+	pHalf.Rsh(gex.p, 1)
+
+	y, err := rand.Int(randSource, pHalf)
+	if err != nil {
+		return
+	}
+
+	Y := new(big.Int).Exp(gex.g, y, gex.p)
+	kInt, err := gex.diffieHellman(kexDHGexInit.X, y)
+	if err != nil {
+		return nil, err
+	}
+
+	hostKeyBytes := priv.PublicKey().Marshal()
+
+	h := gex.hashFunc.New()
+	magics.write(h)
+	writeString(h, hostKeyBytes)
+	binary.Write(h, binary.BigEndian, uint32(dhGroupExchangeMinimumBits))
+	binary.Write(h, binary.BigEndian, uint32(dhGroupExchangePreferredBits))
+	binary.Write(h, binary.BigEndian, uint32(dhGroupExchangeMaximumBits))
+	writeInt(h, gex.p)
+	writeInt(h, gex.g)
+	writeInt(h, kexDHGexInit.X)
+	writeInt(h, Y)
+
+	K := make([]byte, intLength(kInt))
+	marshalInt(K, kInt)
+	h.Write(K)
+
+	H := h.Sum(nil)
+
+	// H is already a hash, but the hostkey signing will apply its
+	// own key-specific hash algorithm.
+	sig, err := signAndMarshal(priv, randSource, H)
+	if err != nil {
+		return nil, err
+	}
+
+	kexDHGexReply := kexDHGexReplyMsg{
+		HostKey:   hostKeyBytes,
+		Y:         Y,
+		Signature: sig,
+	}
+	packet = Marshal(&kexDHGexReply)
+
+	err = c.writePacket(packet)
+
+	return &kexResult{
+		H:         H,
+		K:         K,
+		HostKey:   hostKeyBytes,
+		Signature: sig,
+		Hash:      gex.hashFunc,
+	}, err
+}
--- a/vendor/golang.org/x/crypto/ssh/keys.go
+++ b/vendor/golang.org/x/crypto/ssh/keys.go
@ -33,9 +33,11 @@ const (
 	KeyAlgoRSA        = "ssh-rsa"
 	KeyAlgoDSA        = "ssh-dss"
 	KeyAlgoECDSA256   = "ecdsa-sha2-nistp256"
+	KeyAlgoSKECDSA256 = "sk-ecdsa-sha2-nistp256@openssh.com"
 	KeyAlgoECDSA384   = "ecdsa-sha2-nistp384"
 	KeyAlgoECDSA521   = "ecdsa-sha2-nistp521"
 	KeyAlgoED25519    = "ssh-ed25519"
+	KeyAlgoSKED25519  = "sk-ssh-ed25519@openssh.com"
 )

 // These constants represent non-default signature algorithms that are supported
@ -58,9 +60,13 @@ func parsePubKey(in []byte, algo string) (pubKey PublicKey, rest []byte, err err
 		return parseDSA(in)
 	case KeyAlgoECDSA256, KeyAlgoECDSA384, KeyAlgoECDSA521:
 		return parseECDSA(in)
+	case KeyAlgoSKECDSA256:
+		return parseSKECDSA(in)
 	case KeyAlgoED25519:
 		return parseED25519(in)
-	case CertAlgoRSAv01, CertAlgoDSAv01, CertAlgoECDSA256v01, CertAlgoECDSA384v01, CertAlgoECDSA521v01, CertAlgoED25519v01:
+	case KeyAlgoSKED25519:
+		return parseSKEd25519(in)
+	case CertAlgoRSAv01, CertAlgoDSAv01, CertAlgoECDSA256v01, CertAlgoECDSA384v01, CertAlgoECDSA521v01, CertAlgoSKECDSA256v01, CertAlgoED25519v01, CertAlgoSKED25519v01:
 		cert, err := parseCert(in, certToPrivAlgo(algo))
 		if err != nil {
 			return nil, nil, err
@ -685,6 +691,218 @@ func (k *ecdsaPublicKey) CryptoPublicKey() crypto.PublicKey {
 	return (*ecdsa.PublicKey)(k)
 }

+// skFields holds the additional fields present in U2F/FIDO2 signatures.
+// See openssh/PROTOCOL.u2f 'SSH U2F Signatures' for details.
+type skFields struct {
+	// Flags contains U2F/FIDO2 flags such as 'user present'
+	Flags byte
+	// Counter is a monotonic signature counter which can be
+	// used to detect concurrent use of a private key, should
+	// it be extracted from hardware.
+	Counter uint32
+}
+
+type skECDSAPublicKey struct {
+	// application is a URL-like string, typically "ssh:" for SSH.
+	// see openssh/PROTOCOL.u2f for details.
+	application string
+	ecdsa.PublicKey
+}
+
+func (k *skECDSAPublicKey) Type() string {
+	return KeyAlgoSKECDSA256
+}
+
+func (k *skECDSAPublicKey) nistID() string {
+	return "nistp256"
+}
+
+func parseSKECDSA(in []byte) (out PublicKey, rest []byte, err error) {
+	var w struct {
+		Curve       string
+		KeyBytes    []byte
+		Application string
+		Rest        []byte `ssh:"rest"`
+	}
+
+	if err := Unmarshal(in, &w); err != nil {
+		return nil, nil, err
+	}
+
+	key := new(skECDSAPublicKey)
+	key.application = w.Application
+
+	if w.Curve != "nistp256" {
+		return nil, nil, errors.New("ssh: unsupported curve")
+	}
+	key.Curve = elliptic.P256()
+
+	key.X, key.Y = elliptic.Unmarshal(key.Curve, w.KeyBytes)
+	if key.X == nil || key.Y == nil {
+		return nil, nil, errors.New("ssh: invalid curve point")
+	}
+
+	return key, w.Rest, nil
+}
+
+func (k *skECDSAPublicKey) Marshal() []byte {
+	// See RFC 5656, section 3.1.
+	keyBytes := elliptic.Marshal(k.Curve, k.X, k.Y)
+	w := struct {
+		Name        string
+		ID          string
+		Key         []byte
+		Application string
+	}{
+		k.Type(),
+		k.nistID(),
+		keyBytes,
+		k.application,
+	}
+
+	return Marshal(&w)
+}
+
+func (k *skECDSAPublicKey) Verify(data []byte, sig *Signature) error {
+	if sig.Format != k.Type() {
+		return fmt.Errorf("ssh: signature type %s for key type %s", sig.Format, k.Type())
+	}
+
+	h := ecHash(k.Curve).New()
+	h.Write([]byte(k.application))
+	appDigest := h.Sum(nil)
+
+	h.Reset()
+	h.Write(data)
+	dataDigest := h.Sum(nil)
+
+	var ecSig struct {
+		R *big.Int
+		S *big.Int
+	}
+	if err := Unmarshal(sig.Blob, &ecSig); err != nil {
+		return err
+	}
+
+	var skf skFields
+	if err := Unmarshal(sig.Rest, &skf); err != nil {
+		return err
+	}
+
+	blob := struct {
+		ApplicationDigest []byte `ssh:"rest"`
+		Flags             byte
+		Counter           uint32
+		MessageDigest     []byte `ssh:"rest"`
+	}{
+		appDigest,
+		skf.Flags,
+		skf.Counter,
+		dataDigest,
+	}
+
+	original := Marshal(blob)
+
+	h.Reset()
+	h.Write(original)
+	digest := h.Sum(nil)
+
+	if ecdsa.Verify((*ecdsa.PublicKey)(&k.PublicKey), digest, ecSig.R, ecSig.S) {
+		return nil
+	}
+	return errors.New("ssh: signature did not verify")
+}
+
+type skEd25519PublicKey struct {
+	// application is a URL-like string, typically "ssh:" for SSH.
+	// see openssh/PROTOCOL.u2f for details.
+	application string
+	ed25519.PublicKey
+}
+
+func (k *skEd25519PublicKey) Type() string {
+	return KeyAlgoSKED25519
+}
+
+func parseSKEd25519(in []byte) (out PublicKey, rest []byte, err error) {
+	var w struct {
+		KeyBytes    []byte
+		Application string
+		Rest        []byte `ssh:"rest"`
+	}
+
+	if err := Unmarshal(in, &w); err != nil {
+		return nil, nil, err
+	}
+
+	key := new(skEd25519PublicKey)
+	key.application = w.Application
+	key.PublicKey = ed25519.PublicKey(w.KeyBytes)
+
+	return key, w.Rest, nil
+}
+
+func (k *skEd25519PublicKey) Marshal() []byte {
+	w := struct {
+		Name        string
+		KeyBytes    []byte
+		Application string
+	}{
+		KeyAlgoSKED25519,
+		[]byte(k.PublicKey),
+		k.application,
+	}
+	return Marshal(&w)
+}
+
+func (k *skEd25519PublicKey) Verify(data []byte, sig *Signature) error {
+	if sig.Format != k.Type() {
+		return fmt.Errorf("ssh: signature type %s for key type %s", sig.Format, k.Type())
+	}
+
+	h := sha256.New()
+	h.Write([]byte(k.application))
+	appDigest := h.Sum(nil)
+
+	h.Reset()
+	h.Write(data)
+	dataDigest := h.Sum(nil)
+
+	var edSig struct {
+		Signature []byte `ssh:"rest"`
+	}
+
+	if err := Unmarshal(sig.Blob, &edSig); err != nil {
+		return err
+	}
+
+	var skf skFields
+	if err := Unmarshal(sig.Rest, &skf); err != nil {
+		return err
+	}
+
+	blob := struct {
+		ApplicationDigest []byte `ssh:"rest"`
+		Flags             byte
+		Counter           uint32
+		MessageDigest     []byte `ssh:"rest"`
+	}{
+		appDigest,
+		skf.Flags,
+		skf.Counter,
+		dataDigest,
+	}
+
+	original := Marshal(blob)
+
+	edKey := (ed25519.PublicKey)(k.PublicKey)
+	if ok := ed25519.Verify(edKey, original, edSig.Signature); !ok {
+		return errors.New("ssh: signature did not verify")
+	}
+
+	return nil
+}
+
 // NewSignerFromKey takes an *rsa.PrivateKey, *dsa.PrivateKey,
 // *ecdsa.PrivateKey or any other crypto.Signer and returns a
 // corresponding Signer instance. ECDSA keys must use P-256, P-384 or
@ -837,7 +1055,8 @@ func NewPublicKey(key interface{}) (PublicKey, error) {
 }

 // ParsePrivateKey returns a Signer from a PEM encoded private key. It supports
-// the same keys as ParseRawPrivateKey.
+// the same keys as ParseRawPrivateKey. If the private key is encrypted, it
+// will return a PassphraseMissingError.
 func ParsePrivateKey(pemBytes []byte) (Signer, error) {
 	key, err := ParseRawPrivateKey(pemBytes)
 	if err != nil {
@ -850,8 +1069,8 @@ func ParsePrivateKey(pemBytes []byte) (Signer, error) {
 // ParsePrivateKeyWithPassphrase returns a Signer from a PEM encoded private
 // key and passphrase. It supports the same keys as
 // ParseRawPrivateKeyWithPassphrase.
-func ParsePrivateKeyWithPassphrase(pemBytes, passPhrase []byte) (Signer, error) {
-	key, err := ParseRawPrivateKeyWithPassphrase(pemBytes, passPhrase)
+func ParsePrivateKeyWithPassphrase(pemBytes, passphrase []byte) (Signer, error) {
+	key, err := ParseRawPrivateKeyWithPassphrase(pemBytes, passphrase)
 	if err != nil {
 		return nil, err
 	}
@ -867,8 +1086,21 @@ func encryptedBlock(block *pem.Block) bool {
 	return strings.Contains(block.Headers["Proc-Type"], "ENCRYPTED")
 }

+// A PassphraseMissingError indicates that parsing this private key requires a
+// passphrase. Use ParsePrivateKeyWithPassphrase.
+type PassphraseMissingError struct {
+	// PublicKey will be set if the private key format includes an unencrypted
+	// public key along with the encrypted private key.
+	PublicKey PublicKey
+}
+
+func (*PassphraseMissingError) Error() string {
+	return "ssh: this private key is passphrase protected"
+}
+
 // ParseRawPrivateKey returns a private key from a PEM encoded private key. It
-// supports RSA (PKCS#1), PKCS#8, DSA (OpenSSL), and ECDSA private keys.
+// supports RSA (PKCS#1), PKCS#8, DSA (OpenSSL), and ECDSA private keys. If the
+// private key is encrypted, it will return a PassphraseMissingError.
 func ParseRawPrivateKey(pemBytes []byte) (interface{}, error) {
 	block, _ := pem.Decode(pemBytes)
 	if block == nil {
@ -876,7 +1108,7 @@ func ParseRawPrivateKey(pemBytes []byte) (interface{}, error) {
 	}

 	if encryptedBlock(block) {
-		return nil, errors.New("ssh: cannot decode encrypted private keys")
+		return nil, &PassphraseMissingError{}
 	}

 	switch block.Type {
@ -899,25 +1131,23 @@ func ParseRawPrivateKey(pemBytes []byte) (interface{}, error) {
 // ParseRawPrivateKeyWithPassphrase returns a private key decrypted with
 // passphrase from a PEM encoded private key. If wrong passphrase, return
 // x509.IncorrectPasswordError.
-func ParseRawPrivateKeyWithPassphrase(pemBytes, passPhrase []byte) (interface{}, error) {
+func ParseRawPrivateKeyWithPassphrase(pemBytes, passphrase []byte) (interface{}, error) {
 	block, _ := pem.Decode(pemBytes)
 	if block == nil {
 		return nil, errors.New("ssh: no key found")
 	}
-	buf := block.Bytes

-	if encryptedBlock(block) {
-		if x509.IsEncryptedPEMBlock(block) {
-			var err error
-			buf, err = x509.DecryptPEMBlock(block, passPhrase)
+	if !encryptedBlock(block) || !x509.IsEncryptedPEMBlock(block) {
+		return nil, errors.New("ssh: not an encrypted key")
+	}
+
+	buf, err := x509.DecryptPEMBlock(block, passphrase)
 	if err != nil {
 		if err == x509.IncorrectPasswordError {
 			return nil, err
 		}
 		return nil, fmt.Errorf("ssh: cannot decode encrypted private keys: %v", err)
 	}
-		}
-	}

 	switch block.Type {
 	case "RSA PRIVATE KEY":
@ -926,8 +1156,6 @@ func ParseRawPrivateKeyWithPassphrase(pemBytes, passPhrase []byte) (interface{},
 		return x509.ParseECPrivateKey(buf)
 	case "DSA PRIVATE KEY":
 		return ParseDSAPrivateKey(buf)
-	case "OPENSSH PRIVATE KEY":
-		return parseOpenSSHPrivateKey(buf)
 	default:
 		return nil, fmt.Errorf("ssh: unsupported key type %q", block.Type)
 	}
--- a/vendor/golang.org/x/crypto/ssh/messages.go
+++ b/vendor/golang.org/x/crypto/ssh/messages.go
@ -97,6 +97,36 @@ type kexDHReplyMsg struct {
 	Signature []byte
 }

+// See RFC 4419, section 5.
+const msgKexDHGexGroup = 31
+
+type kexDHGexGroupMsg struct {
+	P *big.Int `sshtype:"31"`
+	G *big.Int
+}
+
+const msgKexDHGexInit = 32
+
+type kexDHGexInitMsg struct {
+	X *big.Int `sshtype:"32"`
+}
+
+const msgKexDHGexReply = 33
+
+type kexDHGexReplyMsg struct {
+	HostKey   []byte `sshtype:"33"`
+	Y         *big.Int
+	Signature []byte
+}
+
+const msgKexDHGexRequest = 34
+
+type kexDHGexRequestMsg struct {
+	MinBits      uint32 `sshtype:"34"`
+	PreferedBits uint32
+	MaxBits      uint32
+}
+
 // See RFC 4253, section 10.
 const msgServiceRequest = 5

@ -275,6 +305,42 @@ type userAuthPubKeyOkMsg struct {
 	PubKey []byte
 }

+// See RFC 4462, section 3
+const msgUserAuthGSSAPIResponse = 60
+
+type userAuthGSSAPIResponse struct {
+	SupportMech []byte `sshtype:"60"`
+}
+
+const msgUserAuthGSSAPIToken = 61
+
+type userAuthGSSAPIToken struct {
+	Token []byte `sshtype:"61"`
+}
+
+const msgUserAuthGSSAPIMIC = 66
+
+type userAuthGSSAPIMIC struct {
+	MIC []byte `sshtype:"66"`
+}
+
+// See RFC 4462, section 3.9
+const msgUserAuthGSSAPIErrTok = 64
+
+type userAuthGSSAPIErrTok struct {
+	ErrorToken []byte `sshtype:"64"`
+}
+
+// See RFC 4462, section 3.8
+const msgUserAuthGSSAPIError = 65
+
+type userAuthGSSAPIError struct {
+	MajorStatus uint32 `sshtype:"65"`
+	MinorStatus uint32
+	Message     string
+	LanguageTag string
+}
+
 // typeTags returns the possible type bytes for the given reflect.Type, which
 // should be a struct. The possible values are separated by a '|' character.
 func typeTags(structType reflect.Type) (tags []byte) {
@ -756,6 +822,14 @@ func decode(packet []byte) (interface{}, error) {
 		msg = new(channelRequestSuccessMsg)
 	case msgChannelFailure:
 		msg = new(channelRequestFailureMsg)
+	case msgUserAuthGSSAPIToken:
+		msg = new(userAuthGSSAPIToken)
+	case msgUserAuthGSSAPIMIC:
+		msg = new(userAuthGSSAPIMIC)
+	case msgUserAuthGSSAPIErrTok:
+		msg = new(userAuthGSSAPIErrTok)
+	case msgUserAuthGSSAPIError:
+		msg = new(userAuthGSSAPIError)
 	default:
 		return nil, unexpectedMessageError(0, packet[0])
 	}
--- a/vendor/golang.org/x/crypto/ssh/server.go
+++ b/vendor/golang.org/x/crypto/ssh/server.go
@ -45,6 +45,20 @@ type Permissions struct {
 	Extensions map[string]string
 }

+type GSSAPIWithMICConfig struct {
+	// AllowLogin, must be set, is called when gssapi-with-mic
+	// authentication is selected (RFC 4462 section 3). The srcName is from the
+	// results of the GSS-API authentication. The format is username@DOMAIN.
+	// GSSAPI just guarantees to the server who the user is, but not if they can log in, and with what permissions.
+	// This callback is called after the user identity is established with GSSAPI to decide if the user can login with
+	// which permissions. If the user is allowed to login, it should return a nil error.
+	AllowLogin func(conn ConnMetadata, srcName string) (*Permissions, error)
+
+	// Server must be set. It's the implementation
+	// of the GSSAPIServer interface. See GSSAPIServer interface for details.
+	Server GSSAPIServer
+}
+
 // ServerConfig holds server specific configuration data.
 type ServerConfig struct {
 	// Config contains configuration shared between client and server.
@ -99,6 +113,10 @@ type ServerConfig struct {
 	// BannerCallback, if present, is called and the return string is sent to
 	// the client after key exchange completed but before authentication.
 	BannerCallback func(conn ConnMetadata) string
+
+	// GSSAPIWithMICConfig includes gssapi server and callback, which if both non-nil, is used
+	// when gssapi-with-mic authentication is selected (RFC 4462 section 3).
+	GSSAPIWithMICConfig *GSSAPIWithMICConfig
 }

 // AddHostKey adds a private key as a host key. If an existing host
@ -175,6 +193,12 @@ func NewServerConn(c net.Conn, config *ServerConfig) (*ServerConn, <-chan NewCha
 	if fullConf.MaxAuthTries == 0 {
 		fullConf.MaxAuthTries = 6
 	}
+	// Check if the config contains any unsupported key exchanges
+	for _, kex := range fullConf.KeyExchanges {
+		if _, ok := serverForbiddenKexAlgos[kex]; ok {
+			return nil, nil, nil, fmt.Errorf("ssh: unsupported key exchange %s for server", kex)
+		}
+	}

 	s := &connection{
 		sshConn: sshConn{conn: c},
@ -204,7 +228,9 @@ func (s *connection) serverHandshake(config *ServerConfig) (*Permissions, error)
 		return nil, errors.New("ssh: server has no host keys")
 	}

-	if !config.NoClientAuth && config.PasswordCallback == nil && config.PublicKeyCallback == nil && config.KeyboardInteractiveCallback == nil {
+	if !config.NoClientAuth && config.PasswordCallback == nil && config.PublicKeyCallback == nil &&
+		config.KeyboardInteractiveCallback == nil && (config.GSSAPIWithMICConfig == nil ||
+		config.GSSAPIWithMICConfig.AllowLogin == nil || config.GSSAPIWithMICConfig.Server == nil) {
 		return nil, errors.New("ssh: no authentication methods configured but NoClientAuth is also false")
 	}

@ -258,8 +284,8 @@ func (s *connection) serverHandshake(config *ServerConfig) (*Permissions, error)

 func isAcceptableAlgo(algo string) bool {
 	switch algo {
-	case KeyAlgoRSA, KeyAlgoDSA, KeyAlgoECDSA256, KeyAlgoECDSA384, KeyAlgoECDSA521, KeyAlgoED25519,
-		CertAlgoRSAv01, CertAlgoDSAv01, CertAlgoECDSA256v01, CertAlgoECDSA384v01, CertAlgoECDSA521v01, CertAlgoED25519v01:
+	case KeyAlgoRSA, KeyAlgoDSA, KeyAlgoECDSA256, KeyAlgoECDSA384, KeyAlgoECDSA521, KeyAlgoSKECDSA256, KeyAlgoED25519, KeyAlgoSKED25519,
+		CertAlgoRSAv01, CertAlgoDSAv01, CertAlgoECDSA256v01, CertAlgoECDSA384v01, CertAlgoECDSA521v01, CertAlgoSKECDSA256v01, CertAlgoED25519v01, CertAlgoSKED25519v01:
 		return true
 	}
 	return false
@ -295,6 +321,55 @@ func checkSourceAddress(addr net.Addr, sourceAddrs string) error {
 	return fmt.Errorf("ssh: remote address %v is not allowed because of source-address restriction", addr)
 }

+func gssExchangeToken(gssapiConfig *GSSAPIWithMICConfig, firstToken []byte, s *connection,
+	sessionID []byte, userAuthReq userAuthRequestMsg) (authErr error, perms *Permissions, err error) {
+	gssAPIServer := gssapiConfig.Server
+	defer gssAPIServer.DeleteSecContext()
+	var srcName string
+	for {
+		var (
+			outToken     []byte
+			needContinue bool
+		)
+		outToken, srcName, needContinue, err = gssAPIServer.AcceptSecContext(firstToken)
+		if err != nil {
+			return err, nil, nil
+		}
+		if len(outToken) != 0 {
+			if err := s.transport.writePacket(Marshal(&userAuthGSSAPIToken{
+				Token: outToken,
+			})); err != nil {
+				return nil, nil, err
+			}
+		}
+		if !needContinue {
+			break
+		}
+		packet, err := s.transport.readPacket()
+		if err != nil {
+			return nil, nil, err
+		}
+		userAuthGSSAPITokenReq := &userAuthGSSAPIToken{}
+		if err := Unmarshal(packet, userAuthGSSAPITokenReq); err != nil {
+			return nil, nil, err
+		}
+	}
+	packet, err := s.transport.readPacket()
+	if err != nil {
+		return nil, nil, err
+	}
+	userAuthGSSAPIMICReq := &userAuthGSSAPIMIC{}
+	if err := Unmarshal(packet, userAuthGSSAPIMICReq); err != nil {
+		return nil, nil, err
+	}
+	mic := buildMIC(string(sessionID), userAuthReq.User, userAuthReq.Service, userAuthReq.Method)
+	if err := gssAPIServer.VerifyMIC(mic, userAuthGSSAPIMICReq.MIC); err != nil {
+		return err, nil, nil
+	}
+	perms, authErr = gssapiConfig.AllowLogin(s, srcName)
+	return authErr, perms, nil
+}
+
 // ServerAuthError represents server authentication errors and is
 // sometimes returned by NewServerConn. It appends any authentication
 // errors that may occur, and is returned if all of the authentication
@ -496,6 +571,49 @@ userAuthLoop:
 				authErr = candidate.result
 				perms = candidate.perms
 			}
+		case "gssapi-with-mic":
+			gssapiConfig := config.GSSAPIWithMICConfig
+			userAuthRequestGSSAPI, err := parseGSSAPIPayload(userAuthReq.Payload)
+			if err != nil {
+				return nil, parseError(msgUserAuthRequest)
+			}
+			// OpenSSH supports Kerberos V5 mechanism only for GSS-API authentication.
+			if userAuthRequestGSSAPI.N == 0 {
+				authErr = fmt.Errorf("ssh: Mechanism negotiation is not supported")
+				break
+			}
+			var i uint32
+			present := false
+			for i = 0; i < userAuthRequestGSSAPI.N; i++ {
+				if userAuthRequestGSSAPI.OIDS[i].Equal(krb5Mesh) {
+					present = true
+					break
+				}
+			}
+			if !present {
+				authErr = fmt.Errorf("ssh: GSSAPI authentication must use the Kerberos V5 mechanism")
+				break
+			}
+			// Initial server response, see RFC 4462 section 3.3.
+			if err := s.transport.writePacket(Marshal(&userAuthGSSAPIResponse{
+				SupportMech: krb5OID,
+			})); err != nil {
+				return nil, err
+			}
+			// Exchange token, see RFC 4462 section 3.4.
+			packet, err := s.transport.readPacket()
+			if err != nil {
+				return nil, err
+			}
+			userAuthGSSAPITokenReq := &userAuthGSSAPIToken{}
+			if err := Unmarshal(packet, userAuthGSSAPITokenReq); err != nil {
+				return nil, err
+			}
+			authErr, perms, err = gssExchangeToken(gssapiConfig, userAuthGSSAPITokenReq.Token, s, sessionID,
+				userAuthReq)
+			if err != nil {
+				return nil, err
+			}
 		default:
 			authErr = fmt.Errorf("ssh: unknown method %q", userAuthReq.Method)
 		}
@ -522,6 +640,10 @@ userAuthLoop:
 		if config.KeyboardInteractiveCallback != nil {
 			failureMsg.Methods = append(failureMsg.Methods, "keyboard-interactive")
 		}
+		if config.GSSAPIWithMICConfig != nil && config.GSSAPIWithMICConfig.Server != nil &&
+			config.GSSAPIWithMICConfig.AllowLogin != nil {
+			failureMsg.Methods = append(failureMsg.Methods, "gssapi-with-mic")
+		}

 		if len(failureMsg.Methods) == 0 {
 			return nil, errors.New("ssh: no authentication methods configured but NoClientAuth is also false")
--- a/vendor/golang.org/x/crypto/ssh/ssh_gss.go
+++ b/vendor/golang.org/x/crypto/ssh/ssh_gss.go
@ -0,0 +1,139 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssh
+
+import (
+	"encoding/asn1"
+	"errors"
+)
+
+var krb5OID []byte
+
+func init() {
+	krb5OID, _ = asn1.Marshal(krb5Mesh)
+}
+
+// GSSAPIClient provides the API to plug-in GSSAPI authentication for client logins.
+type GSSAPIClient interface {
+	// InitSecContext initiates the establishment of a security context for GSS-API between the
+	// ssh client and ssh server. Initially the token parameter should be specified as nil.
+	// The routine may return a outputToken which should be transferred to
+	// the ssh server, where the ssh server will present it to
+	// AcceptSecContext. If no token need be sent, InitSecContext will indicate this by setting
+	// needContinue to false. To complete the context
+	// establishment, one or more reply tokens may be required from the ssh
+	// server;if so, InitSecContext will return a needContinue which is true.
+	// In this case, InitSecContext should be called again when the
+	// reply token is received from the ssh server, passing the reply
+	// token to InitSecContext via the token parameters.
+	// See RFC 2743 section 2.2.1 and RFC 4462 section 3.4.
+	InitSecContext(target string, token []byte, isGSSDelegCreds bool) (outputToken []byte, needContinue bool, err error)
+	// GetMIC generates a cryptographic MIC for the SSH2 message, and places
+	// the MIC in a token for transfer to the ssh server.
+	// The contents of the MIC field are obtained by calling GSS_GetMIC()
+	// over the following, using the GSS-API context that was just
+	// established:
+	//  string    session identifier
+	//  byte      SSH_MSG_USERAUTH_REQUEST
+	//  string    user name
+	//  string    service
+	//  string    "gssapi-with-mic"
+	// See RFC 2743 section 2.3.1 and RFC 4462 3.5.
+	GetMIC(micFiled []byte) ([]byte, error)
+	// Whenever possible, it should be possible for
+	// DeleteSecContext() calls to be successfully processed even
+	// if other calls cannot succeed, thereby enabling context-related
+	// resources to be released.
+	// In addition to deleting established security contexts,
+	// gss_delete_sec_context must also be able to delete "half-built"
+	// security contexts resulting from an incomplete sequence of
+	// InitSecContext()/AcceptSecContext() calls.
+	// See RFC 2743 section 2.2.3.
+	DeleteSecContext() error
+}
+
+// GSSAPIServer provides the API to plug in GSSAPI authentication for server logins.
+type GSSAPIServer interface {
+	// AcceptSecContext allows a remotely initiated security context between the application
+	// and a remote peer to be established by the ssh client. The routine may return a
+	// outputToken which should be transferred to the ssh client,
+	// where the ssh client will present it to InitSecContext.
+	// If no token need be sent, AcceptSecContext will indicate this
+	// by setting the needContinue to false. To
+	// complete the context establishment, one or more reply tokens may be
+	// required from the ssh client. if so, AcceptSecContext
+	// will return a needContinue which is true, in which case it
+	// should be called again when the reply token is received from the ssh
+	// client, passing the token to AcceptSecContext via the
+	// token parameters.
+	// The srcName return value is the authenticated username.
+	// See RFC 2743 section 2.2.2 and RFC 4462 section 3.4.
+	AcceptSecContext(token []byte) (outputToken []byte, srcName string, needContinue bool, err error)
+	// VerifyMIC verifies that a cryptographic MIC, contained in the token parameter,
+	// fits the supplied message is received from the ssh client.
+	// See RFC 2743 section 2.3.2.
+	VerifyMIC(micField []byte, micToken []byte) error
+	// Whenever possible, it should be possible for
+	// DeleteSecContext() calls to be successfully processed even
+	// if other calls cannot succeed, thereby enabling context-related
+	// resources to be released.
+	// In addition to deleting established security contexts,
+	// gss_delete_sec_context must also be able to delete "half-built"
+	// security contexts resulting from an incomplete sequence of
+	// InitSecContext()/AcceptSecContext() calls.
+	// See RFC 2743 section 2.2.3.
+	DeleteSecContext() error
+}
+
+var (
+	// OpenSSH supports Kerberos V5 mechanism only for GSS-API authentication,
+	// so we also support the krb5 mechanism only.
+	// See RFC 1964 section 1.
+	krb5Mesh = asn1.ObjectIdentifier{1, 2, 840, 113554, 1, 2, 2}
+)
+
+// The GSS-API authentication method is initiated when the client sends an SSH_MSG_USERAUTH_REQUEST
+// See RFC 4462 section 3.2.
+type userAuthRequestGSSAPI struct {
+	N    uint32
+	OIDS []asn1.ObjectIdentifier
+}
+
+func parseGSSAPIPayload(payload []byte) (*userAuthRequestGSSAPI, error) {
+	n, rest, ok := parseUint32(payload)
+	if !ok {
+		return nil, errors.New("parse uint32 failed")
+	}
+	s := &userAuthRequestGSSAPI{
+		N:    n,
+		OIDS: make([]asn1.ObjectIdentifier, n),
+	}
+	for i := 0; i < int(n); i++ {
+		var (
+			desiredMech []byte
+			err         error
+		)
+		desiredMech, rest, ok = parseString(rest)
+		if !ok {
+			return nil, errors.New("parse string failed")
+		}
+		if rest, err = asn1.Unmarshal(desiredMech, &s.OIDS[i]); err != nil {
+			return nil, err
+		}
+
+	}
+	return s, nil
+}
+
+// See RFC 4462 section 3.6.
+func buildMIC(sessionID string, username string, service string, authMethod string) []byte {
+	out := make([]byte, 0, 0)
+	out = appendString(out, sessionID)
+	out = append(out, msgUserAuthRequest)
+	out = appendString(out, username)
+	out = appendString(out, service)
+	out = appendString(out, authMethod)
+	return out
+}
--- a/vendor/golang.org/x/crypto/ssh/terminal/terminal.go
+++ b/vendor/golang.org/x/crypto/ssh/terminal/terminal.go
@ -947,6 +947,10 @@ func readPasswordLine(reader io.Reader) ([]byte, error) {
 		n, err := reader.Read(buf[:])
 		if n > 0 {
 			switch buf[0] {
+			case '\b':
+				if len(ret) > 0 {
+					ret = ret[:len(ret)-1]
+				}
 			case '\n':
 				return ret, nil
 			case '\r':
--- a/vendor/golang.org/x/crypto/ssh/terminal/util_windows.go
+++ b/vendor/golang.org/x/crypto/ssh/terminal/util_windows.go
@ -85,8 +85,8 @@ func ReadPassword(fd int) ([]byte, error) {
 	}
 	old := st

-	st &^= (windows.ENABLE_ECHO_INPUT)
-	st |= (windows.ENABLE_PROCESSED_INPUT | windows.ENABLE_LINE_INPUT | windows.ENABLE_PROCESSED_OUTPUT)
+	st &^= (windows.ENABLE_ECHO_INPUT | windows.ENABLE_LINE_INPUT)
+	st |= (windows.ENABLE_PROCESSED_OUTPUT | windows.ENABLE_PROCESSED_INPUT)
 	if err := windows.SetConsoleMode(windows.Handle(fd), st); err != nil {
 		return nil, err
 	}