Merge pull request #5502 from thaJeztah/separate_kvfile

move parsing key-value files to a separate package
This commit is contained in:
Sebastiaan van Stijn 2024-10-04 13:46:08 +02:00 committed by GitHub
commit ff3f94a542
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 283 additions and 161 deletions

View File

@ -919,7 +919,7 @@ func TestParseEnvfileVariablesWithBOMUnicode(t *testing.T) {
} }
// UTF16 with BOM // UTF16 with BOM
e := "contains invalid utf8 bytes at line" e := "invalid utf8 bytes at line"
if _, _, _, err := parseRun([]string{"--env-file=testdata/utf16.env", "img", "cmd"}); err == nil || !strings.Contains(err.Error(), e) { if _, _, _, err := parseRun([]string{"--env-file=testdata/utf16.env", "img", "cmd"}); err == nil || !strings.Contains(err.Error(), e) {
t.Fatalf("Expected an error with message '%s', got %v", e, err) t.Fatalf("Expected an error with message '%s', got %v", e, err)
} }

View File

@ -2,6 +2,8 @@ package opts
import ( import (
"os" "os"
"github.com/docker/cli/pkg/kvfile"
) )
// ParseEnvFile reads a file with environment variables enumerated by lines // ParseEnvFile reads a file with environment variables enumerated by lines
@ -18,5 +20,5 @@ import (
// environment variables, that's why we just strip leading whitespace and // environment variables, that's why we just strip leading whitespace and
// nothing more. // nothing more.
func ParseEnvFile(filename string) ([]string, error) { func ParseEnvFile(filename string) ([]string, error) {
return parseKeyValueFile(filename, os.LookupEnv) return kvfile.Parse(filename, os.LookupEnv)
} }

View File

@ -1,10 +1,8 @@
package opts package opts
import ( import (
"bufio"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"testing" "testing"
"gotest.tools/v3/assert" "gotest.tools/v3/assert"
@ -19,86 +17,12 @@ func tmpFileWithContent(t *testing.T, content string) string {
return fileName return fileName
} }
// Test ParseEnvFile for a file with a few well formatted lines
func TestParseEnvFileGoodFile(t *testing.T) {
content := `foo=bar
baz=quux
# comment
_foobar=foobaz
with.dots=working
and_underscore=working too
`
// Adding a newline + a line with pure whitespace.
// This is being done like this instead of the block above
// because it's common for editors to trim trailing whitespace
// from lines, which becomes annoying since that's the
// exact thing we need to test.
content += "\n \t "
tmpFile := tmpFileWithContent(t, content)
lines, err := ParseEnvFile(tmpFile)
assert.NilError(t, err)
expectedLines := []string{
"foo=bar",
"baz=quux",
"_foobar=foobaz",
"with.dots=working",
"and_underscore=working too",
}
assert.Check(t, is.DeepEqual(lines, expectedLines))
}
// Test ParseEnvFile for an empty file
func TestParseEnvFileEmptyFile(t *testing.T) {
tmpFile := tmpFileWithContent(t, "")
lines, err := ParseEnvFile(tmpFile)
assert.NilError(t, err)
assert.Check(t, is.Len(lines, 0))
}
// Test ParseEnvFile for a non existent file // Test ParseEnvFile for a non existent file
func TestParseEnvFileNonExistentFile(t *testing.T) { func TestParseEnvFileNonExistentFile(t *testing.T) {
_, err := ParseEnvFile("no_such_file") _, err := ParseEnvFile("no_such_file")
assert.Check(t, is.ErrorType(err, os.IsNotExist)) assert.Check(t, is.ErrorType(err, os.IsNotExist))
} }
// Test ParseEnvFile for a badly formatted file
func TestParseEnvFileBadlyFormattedFile(t *testing.T) {
content := `foo=bar
f =quux
`
tmpFile := tmpFileWithContent(t, content)
_, err := ParseEnvFile(tmpFile)
const expectedMessage = "variable 'f ' contains whitespaces"
assert.Check(t, is.ErrorContains(err, expectedMessage))
}
// Test ParseEnvFile for a file with a line exceeding bufio.MaxScanTokenSize
func TestParseEnvFileLineTooLongFile(t *testing.T) {
content := "foo=" + strings.Repeat("a", bufio.MaxScanTokenSize+42)
tmpFile := tmpFileWithContent(t, content)
_, err := ParseEnvFile(tmpFile)
const expectedMessage = "bufio.Scanner: token too long"
assert.Check(t, is.ErrorContains(err, expectedMessage))
}
// ParseEnvFile with a random file, pass through
func TestParseEnvFileRandomFile(t *testing.T) {
content := `first line
another invalid line`
tmpFile := tmpFileWithContent(t, content)
_, err := ParseEnvFile(tmpFile)
const expectedMessage = "variable 'first line' contains whitespaces"
assert.Check(t, is.ErrorContains(err, expectedMessage))
}
// ParseEnvFile with environment variable import definitions // ParseEnvFile with environment variable import definitions
func TestParseEnvVariableDefinitionsFile(t *testing.T) { func TestParseEnvVariableDefinitionsFile(t *testing.T) {
content := `# comment= content := `# comment=
@ -114,15 +38,3 @@ DEFINED_VAR
expectedLines := []string{"DEFINED_VAR=defined-value"} expectedLines := []string{"DEFINED_VAR=defined-value"}
assert.Check(t, is.DeepEqual(variables, expectedLines)) assert.Check(t, is.DeepEqual(variables, expectedLines))
} }
// ParseEnvFile with empty variable name
func TestParseEnvVariableWithNoNameFile(t *testing.T) {
content := `# comment=
=blank variable names are an error case
`
tmpFile := tmpFileWithContent(t, content)
_, err := ParseEnvFile(tmpFile)
const expectedMessage = "no variable name on line '=blank variable names are an error case'"
assert.Check(t, is.ErrorContains(err, expectedMessage))
}

View File

@ -1,70 +0,0 @@
package opts
import (
"bufio"
"bytes"
"fmt"
"os"
"strings"
"unicode"
"unicode/utf8"
)
const whiteSpaces = " \t"
func parseKeyValueFile(filename string, lookupFn func(string) (string, bool)) ([]string, error) {
fh, err := os.Open(filename)
if err != nil {
return []string{}, err
}
defer fh.Close()
lines := []string{}
scanner := bufio.NewScanner(fh)
utf8bom := []byte{0xEF, 0xBB, 0xBF}
for currentLine := 1; scanner.Scan(); currentLine++ {
scannedBytes := scanner.Bytes()
if !utf8.Valid(scannedBytes) {
return []string{}, fmt.Errorf("env file %s contains invalid utf8 bytes at line %d: %v", filename, currentLine, scannedBytes)
}
// We trim UTF8 BOM
if currentLine == 1 {
scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom)
}
// trim the line from all leading whitespace first. trailing whitespace
// is part of the value, and is kept unmodified.
line := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace)
if len(line) == 0 || line[0] == '#' {
// skip empty lines and comments (lines starting with '#')
continue
}
key, _, hasValue := strings.Cut(line, "=")
if len(key) == 0 {
return []string{}, fmt.Errorf("no variable name on line '%s'", line)
}
// leading whitespace was already removed from the line, but
// variables are not allowed to contain whitespace or have
// trailing whitespace.
if strings.ContainsAny(key, whiteSpaces) {
return []string{}, fmt.Errorf("variable '%s' contains whitespaces", key)
}
if hasValue {
// key/value pair is valid and has a value; add the line as-is.
lines = append(lines, line)
continue
}
if lookupFn != nil {
// No value given; try to look up the value. The value may be
// empty but if no value is found, the key is omitted.
if value, found := lookupFn(line); found {
lines = append(lines, key+"="+value)
}
}
}
return lines, scanner.Err()
}

View File

@ -266,6 +266,8 @@ func validateDomain(val string) (string, error) {
return "", fmt.Errorf("%s is not a valid domain", val) return "", fmt.Errorf("%s is not a valid domain", val)
} }
const whiteSpaces = " \t"
// ValidateLabel validates that the specified string is a valid label, and returns it. // ValidateLabel validates that the specified string is a valid label, and returns it.
// //
// Labels are in the form of key=value; key must be a non-empty string, and not // Labels are in the form of key=value; key must be a non-empty string, and not

View File

@ -6,6 +6,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"github.com/docker/cli/pkg/kvfile"
"github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/container"
) )
@ -25,7 +26,7 @@ func ReadKVEnvStrings(files []string, override []string) ([]string, error) {
func readKVStrings(files []string, override []string, emptyFn func(string) (string, bool)) ([]string, error) { func readKVStrings(files []string, override []string, emptyFn func(string) (string, bool)) ([]string, error) {
var variables []string var variables []string
for _, ef := range files { for _, ef := range files {
parsedVars, err := parseKeyValueFile(ef, emptyFn) parsedVars, err := kvfile.Parse(ef, emptyFn)
if err != nil { if err != nil {
return nil, err return nil, err
} }

130
pkg/kvfile/kvfile.go Normal file
View File

@ -0,0 +1,130 @@
// Package kvfile provides utilities to parse line-delimited key/value files
// such as used for label-files and env-files.
//
// # File format
//
// key/value files use the following syntax:
//
// - File must be valid UTF-8.
// - BOM headers are removed.
// - Leading whitespace is removed for each line.
// - Lines starting with "#" are ignored.
// - Empty lines are ignored.
// - Key/Value pairs are provided as "KEY[=<VALUE>]".
// - Maximum line-length is limited to [bufio.MaxScanTokenSize].
//
// # Interpolation, substitution, and escaping
//
// Both keys and values are used as-is; no interpolation, substitution or
// escaping is supported, and quotes are considered part of the key or value.
// Whitespace in values (including leading and trailing) is preserved. Given
// that the file format is line-delimited, neither key, nor value, can contain
// newlines.
//
// # Key/Value pairs
//
// Key/Value pairs take the following format:
//
// KEY[=<VALUE>]
//
// KEY is required and may not contain whitespaces or NUL characters. Any
// other character (except for the "=" delimiter) are accepted, but it is
// recommended to use a subset of the POSIX portable character set, as
// outlined in [Environment Variables].
//
// VALUE is optional, but may be empty. If no value is provided (i.e., no
// equal sign ("=") is present), the KEY is omitted in the result, but some
// functions accept a lookup-function to provide a default value for the
// given key.
//
// [Environment Variables]: https://pubs.opengroup.org/onlinepubs/7908799/xbd/envvar.html
package kvfile
import (
"bufio"
"bytes"
"fmt"
"io"
"os"
"strings"
"unicode"
"unicode/utf8"
)
// Parse parses a line-delimited key/value pairs separated by equal sign.
// It accepts a lookupFn to lookup default values for keys that do not define
// a value. An error is produced if parsing failed, the content contains invalid
// UTF-8 characters, or a key contains whitespaces.
func Parse(filename string, lookupFn func(key string) (value string, found bool)) ([]string, error) {
fh, err := os.Open(filename)
if err != nil {
return []string{}, err
}
out, err := parseKeyValueFile(fh, lookupFn)
_ = fh.Close()
if err != nil {
return []string{}, fmt.Errorf("invalid env file (%s): %v", filename, err)
}
return out, nil
}
// ParseFromReader parses a line-delimited key/value pairs separated by equal sign.
// It accepts a lookupFn to lookup default values for keys that do not define
// a value. An error is produced if parsing failed, the content contains invalid
// UTF-8 characters, or a key contains whitespaces.
func ParseFromReader(r io.Reader, lookupFn func(key string) (value string, found bool)) ([]string, error) {
return parseKeyValueFile(r, lookupFn)
}
const whiteSpaces = " \t"
func parseKeyValueFile(r io.Reader, lookupFn func(string) (string, bool)) ([]string, error) {
lines := []string{}
scanner := bufio.NewScanner(r)
utf8bom := []byte{0xEF, 0xBB, 0xBF}
for currentLine := 1; scanner.Scan(); currentLine++ {
scannedBytes := scanner.Bytes()
if !utf8.Valid(scannedBytes) {
return []string{}, fmt.Errorf("invalid utf8 bytes at line %d: %v", currentLine, scannedBytes)
}
// We trim UTF8 BOM
if currentLine == 1 {
scannedBytes = bytes.TrimPrefix(scannedBytes, utf8bom)
}
// trim the line from all leading whitespace first. trailing whitespace
// is part of the value, and is kept unmodified.
line := strings.TrimLeftFunc(string(scannedBytes), unicode.IsSpace)
if len(line) == 0 || line[0] == '#' {
// skip empty lines and comments (lines starting with '#')
continue
}
key, _, hasValue := strings.Cut(line, "=")
if len(key) == 0 {
return []string{}, fmt.Errorf("no variable name on line '%s'", line)
}
// leading whitespace was already removed from the line, but
// variables are not allowed to contain whitespace or have
// trailing whitespace.
if strings.ContainsAny(key, whiteSpaces) {
return []string{}, fmt.Errorf("variable '%s' contains whitespaces", key)
}
if hasValue {
// key/value pair is valid and has a value; add the line as-is.
lines = append(lines, line)
continue
}
if lookupFn != nil {
// No value given; try to look up the value. The value may be
// empty but if no value is found, the key is omitted.
if value, found := lookupFn(line); found {
lines = append(lines, key+"="+value)
}
}
}
return lines, scanner.Err()
}

145
pkg/kvfile/kvfile_test.go Normal file
View File

@ -0,0 +1,145 @@
package kvfile
import (
"bufio"
"os"
"path/filepath"
"strings"
"testing"
"gotest.tools/v3/assert"
is "gotest.tools/v3/assert/cmp"
)
// Test Parse for a non existent file.
func TestParseNonExistentFile(t *testing.T) {
_, err := Parse("no_such_file", nil)
assert.Check(t, is.ErrorType(err, os.IsNotExist))
}
// Test Parse from a file with a lookup function.
func TestParseWithLookup(t *testing.T) {
content := `# comment=
VAR=VAR_VALUE
EMPTY_VAR=
UNDEFINED_VAR
DEFINED_VAR
`
vars := map[string]string{
"DEFINED_VAR": "defined-value",
}
lookupFn := func(name string) (string, bool) {
v, ok := vars[name]
return v, ok
}
fileName := filepath.Join(t.TempDir(), "envfile")
err := os.WriteFile(fileName, []byte(content), 0o644)
assert.NilError(t, err)
variables, err := Parse(fileName, lookupFn)
assert.NilError(t, err)
expectedLines := []string{"VAR=VAR_VALUE", "EMPTY_VAR=", "DEFINED_VAR=defined-value"}
assert.Check(t, is.DeepEqual(variables, expectedLines))
}
// Test ParseEnvFile for a file with a few well formatted lines
func TestParseFromReaderGoodFile(t *testing.T) {
content := `foo=bar
baz=quux
# comment
_foobar=foobaz
with.dots=working
and_underscore=working too
`
// Adding a newline + a line with pure whitespace.
// This is being done like this instead of the block above
// because it's common for editors to trim trailing whitespace
// from lines, which becomes annoying since that's the
// exact thing we need to test.
content += "\n \t "
lines, err := ParseFromReader(strings.NewReader(content), nil)
assert.NilError(t, err)
expectedLines := []string{
"foo=bar",
"baz=quux",
"_foobar=foobaz",
"with.dots=working",
"and_underscore=working too",
}
assert.Check(t, is.DeepEqual(lines, expectedLines))
}
// Test ParseFromReader for an empty file
func TestParseFromReaderEmptyFile(t *testing.T) {
lines, err := ParseFromReader(strings.NewReader(""), nil)
assert.NilError(t, err)
assert.Check(t, is.Len(lines, 0))
}
// Test ParseFromReader for a badly formatted file
func TestParseFromReaderBadlyFormattedFile(t *testing.T) {
content := `foo=bar
f =quux
`
_, err := ParseFromReader(strings.NewReader(content), nil)
const expectedMessage = "variable 'f ' contains whitespaces"
assert.Check(t, is.ErrorContains(err, expectedMessage))
}
// Test ParseFromReader for a file with a line exceeding bufio.MaxScanTokenSize
func TestParseFromReaderLineTooLongFile(t *testing.T) {
content := "foo=" + strings.Repeat("a", bufio.MaxScanTokenSize+42)
_, err := ParseFromReader(strings.NewReader(content), nil)
const expectedMessage = "bufio.Scanner: token too long"
assert.Check(t, is.ErrorContains(err, expectedMessage))
}
// ParseEnvFile with a random file, pass through
func TestParseFromReaderRandomFile(t *testing.T) {
content := `first line
another invalid line`
_, err := ParseFromReader(strings.NewReader(content), nil)
const expectedMessage = "variable 'first line' contains whitespaces"
assert.Check(t, is.ErrorContains(err, expectedMessage))
}
// Test ParseFromReader with a lookup function.
func TestParseFromReaderWithLookup(t *testing.T) {
content := `# comment=
VAR=VAR_VALUE
EMPTY_VAR=
UNDEFINED_VAR
DEFINED_VAR
`
vars := map[string]string{
"DEFINED_VAR": "defined-value",
}
lookupFn := func(name string) (string, bool) {
v, ok := vars[name]
return v, ok
}
variables, err := ParseFromReader(strings.NewReader(content), lookupFn)
assert.NilError(t, err)
expectedLines := []string{"VAR=VAR_VALUE", "EMPTY_VAR=", "DEFINED_VAR=defined-value"}
assert.Check(t, is.DeepEqual(variables, expectedLines))
}
// Test ParseFromReader with empty variable name
func TestParseFromReaderWithNoName(t *testing.T) {
content := `# comment=
=blank variable names are an error case
`
_, err := ParseFromReader(strings.NewReader(content), nil)
const expectedMessage = "no variable name on line '=blank variable names are an error case'"
assert.Check(t, is.ErrorContains(err, expectedMessage))
}