From 79a9fd61fdd8974674775a28f1d1211db4de5f5a Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Mon, 21 Jun 2021 12:54:15 +0200 Subject: [PATCH] vendor: github.com/opencontainers/runc v1.0.0-rc95 Signed-off-by: Sebastiaan van Stijn --- vendor.conf | 2 +- .../github.com/opencontainers/runc/README.md | 38 +- vendor/github.com/opencontainers/runc/go.mod | 30 +- .../runc/libcontainer/README.md | 170 +++++---- .../runc/libcontainer/nsenter/cloned_binary.c | 80 ++-- .../runc/libcontainer/nsenter/escape.c | 142 +++++++ .../runc/libcontainer/nsenter/nsexec.c | 353 +++++++++++------- .../runc/libcontainer/nsenter/test/escape.c | 1 + .../runc/libcontainer/nsenter/test/escape.go | 53 +++ .../runc/libcontainer/user/lookup.go | 41 -- .../runc/libcontainer/user/lookup_unix.go | 20 +- .../runc/libcontainer/user/lookup_windows.go | 40 -- .../runc/libcontainer/user/user.go | 52 +-- .../runc/libcontainer/user/user_fuzzer.go | 42 +++ 14 files changed, 662 insertions(+), 402 deletions(-) create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/nsenter/escape.c create mode 120000 vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.c create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go delete mode 100644 vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go create mode 100644 vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go diff --git a/vendor.conf b/vendor.conf index 48b9ff1a02..bbdd145a8e 100755 --- a/vendor.conf +++ b/vendor.conf @@ -55,7 +55,7 @@ github.com/modern-go/reflect2 94122c33edd36123c84d5368cfb2 github.com/morikuni/aec 39771216ff4c63d11f5e604076f9c45e8be1067b # v1.0.0 github.com/opencontainers/go-digest ea51bea511f75cfa3ef6098cc253c5c3609b037a # v1.0.0 github.com/opencontainers/image-spec d60099175f88c47cd379c4738d158884749ed235 # v1.0.1 -github.com/opencontainers/runc ff819c7e9184c13b7c2607fe6c30ae19403a7aff # v1.0.0-rc92 +github.com/opencontainers/runc b9ee9c6314599f1b4a7f497e1f1f856fe433d3b7 # v1.0.0-rc95 github.com/opentracing/opentracing-go d34af3eaa63c4d08ab54863a4bdd0daa45212e12 # v1.2.0 github.com/pkg/errors 614d223910a179a466c1767a985424175c39b465 # v0.9.1 github.com/prometheus/client_golang 6edbbd9e560190e318cdc5b4d3e630b442858380 # v1.6.0 diff --git a/vendor/github.com/opencontainers/runc/README.md b/vendor/github.com/opencontainers/runc/README.md index aeef59895b..23106d303c 100644 --- a/vendor/github.com/opencontainers/runc/README.md +++ b/vendor/github.com/opencontainers/runc/README.md @@ -1,9 +1,10 @@ # runc -[![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc) [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc) [![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc) [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/588/badge)](https://bestpractices.coreinfrastructure.org/projects/588) +[![gha/validate](https://github.com/opencontainers/runc/workflows/validate/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Avalidate) +[![gha/ci](https://github.com/opencontainers/runc/workflows/ci/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Aci) ## Introduction @@ -17,10 +18,6 @@ This means that `runc` 1.0.0 should implement the 1.0 version of the specificati You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page. -Currently, the following features are not considered to be production-ready: - -* [Support for cgroup v2](./docs/cgroup-v2.md) - ## Security The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md). @@ -64,19 +61,20 @@ sudo make install with some of them enabled by default (see `BUILDTAGS` in top-level `Makefile`). To change build tags from the default, set the `BUILDTAGS` variable for make, -e.g. +e.g. to disable seccomp: ```bash -make BUILDTAGS='seccomp apparmor' +make BUILDTAGS="" ``` | Build Tag | Feature | Enabled by default | Dependency | |-----------|------------------------------------|--------------------|------------| | seccomp | Syscall filtering | yes | libseccomp | -| selinux | selinux process and mount labeling | yes | | -| apparmor | apparmor profile support | yes | | -| nokmem | disable kernel memory accounting | no | | +The following build tags were used earlier, but are now obsoleted: + - **nokmem** (since runc v1.0.0-rc94 kernel memory settings are ignored) + - **apparmor** (since runc v1.0.0-rc93 the feature is always enabled) + - **selinux** (since runc v1.0.0-rc93 the feature is always enabled) ### Running the test suite @@ -128,6 +126,14 @@ make verify-dependencies ## Using runc +Please note that runc is a low level tool not designed with an end user +in mind. It is mostly employed by other higher level container software. + +Therefore, unless there is some specific use case that prevents the use +of tools like Docker or Podman, it is not recommended to use runc directly. + +If you still want to use runc, here's how. + ### Creating an OCI Bundle In order to use runc you must have your container in the format of an OCI bundle. @@ -169,7 +175,9 @@ If you used the unmodified `runc spec` template this should give you a `sh` sess The second way to start a container is using the specs lifecycle operations. This gives you more power over how the container is created and managed while it is running. -This will also launch the container in the background so you will have to edit the `config.json` to remove the `terminal` setting for the simple examples here. +This will also launch the container in the background so you will have to edit +the `config.json` to remove the `terminal` setting for the simple examples +below (see more details about [runc terminal handling](docs/terminals.md)). Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`. @@ -292,8 +300,12 @@ PIDFile=/run/mycontainerid.pid WantedBy=multi-user.target ``` -#### cgroup v2 -See [`./docs/cgroup-v2.md`](./docs/cgroup-v2.md). +## More documentation + +* [cgroup v2](./docs/cgroup-v2.md) +* [Checkpoint and restore](./docs/checkpoint-restore.md) +* [systemd cgroup driver](./docs/systemd.md) +* [Terminals and standard IO](./docs/terminals.md) ## License diff --git a/vendor/github.com/opencontainers/runc/go.mod b/vendor/github.com/opencontainers/runc/go.mod index fcf068dfae..41cd5aa385 100644 --- a/vendor/github.com/opencontainers/runc/go.mod +++ b/vendor/github.com/opencontainers/runc/go.mod @@ -1,26 +1,28 @@ module github.com/opencontainers/runc -go 1.14 +go 1.13 require ( - github.com/checkpoint-restore/go-criu/v4 v4.1.0 - github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775 - github.com/containerd/console v1.0.0 - github.com/coreos/go-systemd/v22 v22.1.0 + github.com/checkpoint-restore/go-criu/v5 v5.0.0 + github.com/cilium/ebpf v0.5.0 + github.com/containerd/console v1.0.2 + github.com/coreos/go-systemd/v22 v22.3.1 github.com/cyphar/filepath-securejoin v0.2.2 github.com/docker/go-units v0.4.0 - github.com/godbus/dbus/v5 v5.0.3 - github.com/golang/protobuf v1.4.2 - github.com/moby/sys/mountinfo v0.1.3 - github.com/mrunalp/fileutils v0.0.0-20200520151820-abd8a0e76976 - github.com/opencontainers/runtime-spec v1.0.3-0.20200728170252-4d89ac9fbff6 - github.com/opencontainers/selinux v1.6.0 + github.com/godbus/dbus/v5 v5.0.4 + github.com/moby/sys/mountinfo v0.4.1 + github.com/mrunalp/fileutils v0.5.0 + github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 + github.com/opencontainers/selinux v1.8.0 github.com/pkg/errors v0.9.1 github.com/seccomp/libseccomp-golang v0.9.1 - github.com/sirupsen/logrus v1.6.0 - github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2 + github.com/sirupsen/logrus v1.7.0 + github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // NOTE: urfave/cli must be <= v1.22.1 due to a regression: https://github.com/urfave/cli/issues/1092 github.com/urfave/cli v1.22.1 github.com/vishvananda/netlink v1.1.0 - golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1 + github.com/willf/bitset v1.1.11 + golang.org/x/net v0.0.0-20201224014010-6772e930b67b + golang.org/x/sys v0.0.0-20210426230700-d19ff857e887 + google.golang.org/protobuf v1.25.0 ) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/README.md b/vendor/github.com/opencontainers/runc/libcontainer/README.md index 6803ef56c5..13eee49d4b 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/README.md +++ b/vendor/github.com/opencontainers/runc/libcontainer/README.md @@ -57,90 +57,94 @@ struct describing how the container is to be created. A sample would look simila ```go defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV +var devices []*configs.DeviceRule +for _, device := range specconv.AllowedDevices { + devices = append(devices, &device.Rule) +} config := &configs.Config{ Rootfs: "/your/path/to/rootfs", Capabilities: &configs.Capabilities{ - Bounding: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - Effective: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - Inheritable: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - Permitted: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - Ambient: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - }, + Bounding: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Effective: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Inheritable: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Permitted: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + Ambient: []string{ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE", + }, + }, Namespaces: configs.Namespaces([]configs.Namespace{ {Type: configs.NEWNS}, {Type: configs.NEWUTS}, @@ -155,7 +159,7 @@ config := &configs.Config{ Parent: "system", Resources: &configs.Resources{ MemorySwappiness: nil, - Devices: specconv.AllowedDevices, + Devices: devices, }, }, MaskPaths: []string{ @@ -313,7 +317,7 @@ state, err := container.State() #### Checkpoint & Restore libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers. -This let's you save the state of a process running inside a container to disk, and then restore +This lets you save the state of a process running inside a container to disk, and then restore that state into a new process, on the same machine or on another machine. `criu` version 1.5.2 or higher is required to use checkpoint and restore. diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c index 24cc8c6e16..4268ebda9e 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c @@ -59,14 +59,38 @@ #include /* Use our own wrapper for memfd_create. */ -#if !defined(SYS_memfd_create) && defined(__NR_memfd_create) -# define SYS_memfd_create __NR_memfd_create +#ifndef SYS_memfd_create +# ifdef __NR_memfd_create +# define SYS_memfd_create __NR_memfd_create +# else +/* These values come from . */ +# warning "libc is outdated -- using hard-coded SYS_memfd_create" +# if defined(__x86_64__) +# define SYS_memfd_create 319 +# elif defined(__i386__) +# define SYS_memfd_create 356 +# elif defined(__ia64__) +# define SYS_memfd_create 1340 +# elif defined(__arm__) +# define SYS_memfd_create 385 +# elif defined(__aarch64__) +# define SYS_memfd_create 279 +# elif defined(__ppc__) || defined(__PPC64__) || defined(__powerpc64__) +# define SYS_memfd_create 360 +# elif defined(__s390__) || defined(__s390x__) +# define SYS_memfd_create 350 +# else +# warning "unknown architecture -- cannot hard-code SYS_memfd_create" +# endif +# endif #endif + /* memfd_create(2) flags -- copied from . */ #ifndef MFD_CLOEXEC # define MFD_CLOEXEC 0x0001U # define MFD_ALLOW_SEALING 0x0002U #endif + int memfd_create(const char *name, unsigned int flags) { #ifdef SYS_memfd_create @@ -77,7 +101,6 @@ int memfd_create(const char *name, unsigned int flags) #endif } - /* This comes directly from . */ #ifndef F_LINUX_SPECIFIC_BASE # define F_LINUX_SPECIFIC_BASE 1024 @@ -103,7 +126,7 @@ static void *must_realloc(void *ptr, size_t size) void *old = ptr; do { ptr = realloc(old, size); - } while(!ptr); + } while (!ptr); return ptr; } @@ -115,10 +138,10 @@ static void *must_realloc(void *ptr, size_t size) static int is_self_cloned(void) { int fd, ret, is_cloned = 0; - struct stat statbuf = {}; - struct statfs fsbuf = {}; + struct stat statbuf = { }; + struct statfs fsbuf = { }; - fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC); + fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC); if (fd < 0) { fprintf(stderr, "you have no read access to runc binary file\n"); return -ENOTRECOVERABLE; @@ -274,7 +297,7 @@ enum { static int make_execfd(int *fdtype) { int fd = -1; - char template[PATH_MAX] = {0}; + char template[PATH_MAX] = { 0 }; char *prefix = getenv("_LIBCONTAINER_STATEDIR"); if (!prefix || *prefix != '/') @@ -303,7 +326,7 @@ static int make_execfd(int *fdtype) *fdtype = EFD_FILE; fd = open(prefix, O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0700); if (fd >= 0) { - struct stat statbuf = {}; + struct stat statbuf = { }; bool working_otmpfile = false; /* @@ -348,27 +371,27 @@ static int seal_execfd(int *fd, int fdtype) switch (fdtype) { case EFD_MEMFD: return fcntl(*fd, F_ADD_SEALS, RUNC_MEMFD_SEALS); - case EFD_FILE: { - /* Need to re-open our pseudo-memfd as an O_PATH to avoid execve(2) giving -ETXTBSY. */ - int newfd; - char fdpath[PATH_MAX] = {0}; + case EFD_FILE:{ + /* Need to re-open our pseudo-memfd as an O_PATH to avoid execve(2) giving -ETXTBSY. */ + int newfd; + char fdpath[PATH_MAX] = { 0 }; - if (fchmod(*fd, 0100) < 0) - return -1; + if (fchmod(*fd, 0100) < 0) + return -1; - if (snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", *fd) < 0) - return -1; + if (snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", *fd) < 0) + return -1; - newfd = open(fdpath, O_PATH | O_CLOEXEC); - if (newfd < 0) - return -1; + newfd = open(fdpath, O_PATH | O_CLOEXEC); + if (newfd < 0) + return -1; - close(*fd); - *fd = newfd; - return 0; - } + close(*fd); + *fd = newfd; + return 0; + } default: - break; + break; } return -1; } @@ -376,7 +399,7 @@ static int seal_execfd(int *fd, int fdtype) static int try_bindfd(void) { int fd, ret = -1; - char template[PATH_MAX] = {0}; + char template[PATH_MAX] = { 0 }; char *prefix = getenv("_LIBCONTAINER_STATEDIR"); if (!prefix || *prefix != '/') @@ -404,7 +427,6 @@ static int try_bindfd(void) if (mount("", template, "", MS_REMOUNT | MS_BIND | MS_RDONLY, "") < 0) goto out_umount; - /* Get read-only handle that we're sure can't be made read-write. */ ret = open(template, O_PATH | O_CLOEXEC); @@ -448,7 +470,7 @@ static ssize_t fd_to_fd(int outfd, int infd) if (n < 0) return -1; nwritten += n; - } while(nwritten < nread); + } while (nwritten < nread); total += nwritten; } @@ -459,7 +481,7 @@ static ssize_t fd_to_fd(int outfd, int infd) static int clone_binary(void) { int binfd, execfd; - struct stat statbuf = {}; + struct stat statbuf = { }; size_t sent = 0; int fdtype = EFD_NONE; diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/escape.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/escape.c new file mode 100644 index 0000000000..78e7e9f489 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/escape.c @@ -0,0 +1,142 @@ +#include +#include + +#ifdef ESCAPE_TEST +# include +# define test_assert(arg) assert(arg) +#else +# define test_assert(arg) +#endif + +#define DEL '\x7f' + +/* + * Poor man version of itoa with base=16 and input number from 0 to 15, + * represented by a char. Converts it to a single hex digit ('0' to 'f'). + */ +static char hex(char i) +{ + test_assert(i >= 0 && i < 16); + + if (i >= 0 && i < 10) { + return '0' + i; + } + if (i >= 10 && i < 16) { + return 'a' + i - 10; + } + return '?'; +} + +/* + * Given the character, tells how many _extra_ characters are needed + * to JSON-escape it. If 0 is returned, the character does not need to + * be escaped. + */ +static int need_escape(char c) +{ + switch (c) { + case '\\': + case '"': + case '\b': + case '\n': + case '\r': + case '\t': + case '\f': + return 1; + case DEL: // -> \u007f + return 5; + default: + if (c > 0 && c < ' ') { + // ASCII decimal 01 to 31 -> \u00xx + return 5; + } + return 0; + } +} + +/* + * Escape the string so it can be used as a JSON string (per RFC4627, + * section 2.5 minimal requirements, plus the DEL (0x7f) character). + * + * It is expected that the argument is a string allocated via malloc. + * In case no escaping is needed, the original string is returned as is; + * otherwise, the original string is free'd, and the newly allocated + * escaped string is returned. Thus, in any case, the value returned + * need to be free'd by the caller. + */ +char *escape_json_string(char *s) +{ + int i, j, len; + char *c, *out; + + /* + * First, check if escaping is at all needed -- if not, we can avoid + * malloc and return the argument as is. While at it, count how much + * extra space is required. + * + * XXX: the counting code must be in sync with the escaping code + * (checked by test_assert()s below). + */ + for (i = j = 0; s[i] != '\0'; i++) { + j += need_escape(s[i]); + } + if (j == 0) { + // nothing to escape + return s; + } + + len = i + j + 1; + out = malloc(len); + if (!out) { + free(s); + // As malloc failed, strdup can fail, too, so in the worst case + // scenario NULL will be returned from here. + return strdup("escape_json_string: out of memory"); + } + for (c = s, j = 0; *c != '\0'; c++) { + switch (*c) { + case '"': + case '\\': + test_assert(need_escape(*c) == 1); + out[j++] = '\\'; + out[j++] = *c; + continue; + } + if ((*c < 0 || *c >= ' ') && (*c != DEL)) { + // no escape needed + test_assert(need_escape(*c) == 0); + out[j++] = *c; + continue; + } + out[j++] = '\\'; + switch (*c) { + case '\b': + out[j++] = 'b'; + break; + case '\n': + out[j++] = 'n'; + break; + case '\r': + out[j++] = 'r'; + break; + case '\t': + out[j++] = 't'; + break; + case '\f': + out[j++] = 'f'; + break; + default: + test_assert(need_escape(*c) == 5); + out[j++] = 'u'; + out[j++] = '0'; + out[j++] = '0'; + out[j++] = hex(*c >> 4); + out[j++] = hex(*c & 0x0f); + } + } + test_assert(j + 1 == len); + out[j] = '\0'; + + free(s); + return out; +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c index a33f2fcc37..bee0042942 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c @@ -29,6 +29,8 @@ /* Get all of the CLONE_NEW* flags. */ #include "namespace.h" +extern char *escape_json_string(char *str); + /* Synchronisation values. */ enum sync_t { SYNC_USERMAP_PLS = 0x40, /* Request parent to map our users. */ @@ -36,7 +38,7 @@ enum sync_t { SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */ SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */ SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */ - SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */ + SYNC_CHILD_FINISH = 0x45, /* The child or grandchild has finished. */ }; /* @@ -45,10 +47,14 @@ enum sync_t { */ #define CREATECGROUPNS 0x80 +#define STAGE_SETUP -1 /* longjmp() arguments. */ -#define JUMP_PARENT 0x00 -#define JUMP_CHILD 0xA0 -#define JUMP_INIT 0xA1 +#define STAGE_PARENT 0 +#define STAGE_CHILD 1 +#define STAGE_INIT 2 + +/* Stores the current stage of nsexec. */ +int current_stage = STAGE_SETUP; /* Assume the stack grows down, so arguments should be above it. */ struct clone_t { @@ -56,7 +62,7 @@ struct clone_t { * Reserve some space for clone() to locate arguments * and retcode in this place */ - char stack[4096] __attribute__ ((aligned(16))); + char stack[4096] __attribute__((aligned(16))); char stack_ptr[0]; /* There's two children. This is used to execute the different code. */ @@ -102,31 +108,31 @@ static int logfd = -1; * List of netlink message types sent to us as part of bootstrapping the init. * These constants are defined in libcontainer/message_linux.go. */ -#define INIT_MSG 62000 +#define INIT_MSG 62000 #define CLONE_FLAGS_ATTR 27281 #define NS_PATHS_ATTR 27282 -#define UIDMAP_ATTR 27283 -#define GIDMAP_ATTR 27284 +#define UIDMAP_ATTR 27283 +#define GIDMAP_ATTR 27284 #define SETGROUP_ATTR 27285 #define OOM_SCORE_ADJ_ATTR 27286 #define ROOTLESS_EUID_ATTR 27287 -#define UIDMAPPATH_ATTR 27288 -#define GIDMAPPATH_ATTR 27289 +#define UIDMAPPATH_ATTR 27288 +#define GIDMAPPATH_ATTR 27289 /* * Use the raw syscall for versions of glibc which don't include a function for * it, namely (glibc 2.12). */ #if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14 -# define _GNU_SOURCE -# include "syscall.h" -# if !defined(SYS_setns) && defined(__NR_setns) -# define SYS_setns __NR_setns -# endif +# define _GNU_SOURCE +# include "syscall.h" +# if !defined(SYS_setns) && defined(__NR_setns) +# define SYS_setns __NR_setns +# endif -#ifndef SYS_setns -# error "setns(2) syscall not supported by glibc version" -#endif +# ifndef SYS_setns +# error "setns(2) syscall not supported by glibc version" +# endif int setns(int fd, int nstype) { @@ -134,33 +140,43 @@ int setns(int fd, int nstype) } #endif -static void write_log_with_info(const char *level, const char *function, int line, const char *format, ...) +static void write_log(const char *level, const char *format, ...) { - char message[1024] = {}; - + char *message = NULL, *stage = NULL; va_list args; + int ret; if (logfd < 0 || level == NULL) - return; + goto out; va_start(args, format); - if (vsnprintf(message, sizeof(message), format, args) < 0) - goto done; - - dprintf(logfd, "{\"level\":\"%s\", \"msg\": \"%s:%d %s\"}\n", level, function, line, message); -done: + ret = vasprintf(&message, format, args); va_end(args); -} + if (ret < 0) + goto out; -#define write_log(level, fmt, ...) \ - write_log_with_info((level), __FUNCTION__, __LINE__, (fmt), ##__VA_ARGS__) + message = escape_json_string(message); + + if (current_stage == STAGE_SETUP) + stage = strdup("nsexec"); + else + ret = asprintf(&stage, "nsexec-%d", current_stage); + if (ret < 0) + goto out; + + dprintf(logfd, "{\"level\":\"%s\", \"msg\": \"%s[%d]: %s\"}\n", level, stage, getpid(), message); + +out: + free(message); + free(stage); +} /* XXX: This is ugly. */ static int syncfd = -1; #define bail(fmt, ...) \ do { \ - write_log(FATAL, "nsenter: " fmt ": %m", ##__VA_ARGS__); \ + write_log(FATAL, fmt ": %m", ##__VA_ARGS__); \ exit(1); \ } while(0) @@ -187,7 +203,7 @@ static int write_file(char *data, size_t data_len, char *pathfmt, ...) goto out; } - out: +out: close(fd); return ret; } @@ -297,9 +313,11 @@ static void update_uidmap(const char *path, int pid, char *map, size_t map_len) if (map == NULL || map_len <= 0) return; + write_log(DEBUG, "update /proc/%d/uid_map to '%s'", pid, map); if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) { if (errno != EPERM) bail("failed to update /proc/%d/uid_map", pid); + write_log(DEBUG, "update /proc/%d/uid_map got -EPERM (trying %s)", pid, path); if (try_mapping_tool(path, pid, map, map_len)) bail("failed to use newuid map on %d", pid); } @@ -310,9 +328,11 @@ static void update_gidmap(const char *path, int pid, char *map, size_t map_len) if (map == NULL || map_len <= 0) return; + write_log(DEBUG, "update /proc/%d/gid_map to '%s'", pid, map); if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) { if (errno != EPERM) bail("failed to update /proc/%d/gid_map", pid); + write_log(DEBUG, "update /proc/%d/gid_map got -EPERM (trying %s)", pid, path); if (try_mapping_tool(path, pid, map, map_len)) bail("failed to use newgid map on %d", pid); } @@ -323,19 +343,20 @@ static void update_oom_score_adj(char *data, size_t len) if (data == NULL || len <= 0) return; + write_log(DEBUG, "update /proc/self/oom_score_adj to '%s'", data); if (write_file(data, len, "/proc/self/oom_score_adj") < 0) bail("failed to update /proc/self/oom_score_adj"); } /* A dummy function that just jumps to the given jumpval. */ -static int child_func(void *arg) __attribute__ ((noinline)); +static int child_func(void *arg) __attribute__((noinline)); static int child_func(void *arg) { struct clone_t *ca = (struct clone_t *)arg; longjmp(*ca->env, ca->jmpval); } -static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline)); +static int clone_parent(jmp_buf *env, int jmpval) __attribute__((noinline)); static int clone_parent(jmp_buf *env, int jmpval) { struct clone_t ca = { @@ -507,7 +528,6 @@ void join_namespaces(char *nslist) char *namespace = strtok_r(nslist, ",", &saveptr); struct namespace_t { int fd; - int ns; char type[PATH_MAX]; char path[PATH_MAX]; } *namespaces = NULL; @@ -542,7 +562,7 @@ void join_namespaces(char *nslist) bail("failed to open %s", path); ns->fd = fd; - ns->ns = nsflag(namespace); + strncpy(ns->type, namespace, PATH_MAX - 1); strncpy(ns->path, path, PATH_MAX - 1); ns->path[PATH_MAX - 1] = '\0'; } while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL); @@ -555,12 +575,14 @@ void join_namespaces(char *nslist) */ for (i = 0; i < num; i++) { - struct namespace_t ns = namespaces[i]; + struct namespace_t *ns = &namespaces[i]; + int flag = nsflag(ns->type); - if (setns(ns.fd, ns.ns) < 0) - bail("failed to setns to %s", ns.path); + write_log(DEBUG, "setns(%#x) into %s namespace (with path %s)", flag, ns->type, ns->path); + if (setns(ns->fd, flag) < 0) + bail("failed to setns into %s namespace", ns->type); - close(ns.fd); + close(ns->fd); } free(namespaces); @@ -569,6 +591,14 @@ void join_namespaces(char *nslist) /* Defined in cloned_binary.c. */ extern int ensure_cloned_binary(void); +static inline int sane_kill(pid_t pid, int signum) +{ + if (pid > 0) + return kill(pid, signum); + else + return 0; +} + void nsexec(void) { int pipenum; @@ -598,7 +628,14 @@ void nsexec(void) if (ensure_cloned_binary() < 0) bail("could not ensure we are a cloned binary"); - write_log(DEBUG, "nsexec started"); + /* + * Inform the parent we're past initial setup. + * For the other side of this, see initWaiter. + */ + if (write(pipenum, "", 1) != 1) + bail("could not inform the parent we are past initial setup"); + + write_log(DEBUG, "=> nsexec container setup"); /* Parse all of the netlink configuration. */ nl_parse(pipenum, &config); @@ -622,6 +659,7 @@ void nsexec(void) * containers), which is the recommendation from the kernel folks. */ if (config.namespaces) { + write_log(DEBUG, "set process as non-dumpable"); if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) bail("failed to set process as non-dumpable"); } @@ -686,45 +724,49 @@ void nsexec(void) * -- Aleksa "what has my life come to?" Sarai */ - switch (setjmp(env)) { + current_stage = setjmp(env); + switch (current_stage) { /* * Stage 0: We're in the parent. Our job is just to create a new child - * (stage 1: JUMP_CHILD) process and write its uid_map and + * (stage 1: STAGE_CHILD) process and write its uid_map and * gid_map. That process will go on to create a new process, then * it will send us its PID which we will send to the bootstrap * process. */ - case JUMP_PARENT:{ + case STAGE_PARENT:{ int len; - pid_t child, first_child = -1; - bool ready = false; + pid_t stage1_pid = -1, stage2_pid = -1; + bool stage1_complete, stage2_complete; /* For debugging. */ prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0); + write_log(DEBUG, "~> nsexec stage-0"); /* Start the process of getting a container. */ - child = clone_parent(&env, JUMP_CHILD); - if (child < 0) - bail("unable to fork: child_func"); + write_log(DEBUG, "spawn stage-1"); + stage1_pid = clone_parent(&env, STAGE_CHILD); + if (stage1_pid < 0) + bail("unable to spawn stage-1"); - /* - * State machine for synchronisation with the children. - * - * Father only return when both child and grandchild are - * ready, so we can receive all possible error codes - * generated by children. - */ syncfd = sync_child_pipe[1]; close(sync_child_pipe[0]); - while (!ready) { + /* + * State machine for synchronisation with the children. We only + * return once both the child and grandchild are ready. + */ + write_log(DEBUG, "-> stage-1 synchronisation loop"); + stage1_complete = false; + while (!stage1_complete) { enum sync_t s; if (read(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with child: next state"); + bail("failed to sync with stage-1: next state"); switch (s) { case SYNC_USERMAP_PLS: + write_log(DEBUG, "stage-1 requested userns mappings"); + /* * Enable setgroups(2) if we've been asked to. But we also * have to explicitly disable setgroups(2) if we're @@ -735,70 +777,78 @@ void nsexec(void) * For rootless multi-entry mapping, config.is_setgroup shall be true and * newuidmap/newgidmap shall be used. */ - if (config.is_rootless_euid && !config.is_setgroup) - update_setgroups(child, SETGROUPS_DENY); + update_setgroups(stage1_pid, SETGROUPS_DENY); /* Set up mappings. */ - update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len); - update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len); + update_uidmap(config.uidmappath, stage1_pid, config.uidmap, config.uidmap_len); + update_gidmap(config.gidmappath, stage1_pid, config.gidmap, config.gidmap_len); s = SYNC_USERMAP_ACK; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(child, SIGKILL); - bail("failed to sync with child: write(SYNC_USERMAP_ACK)"); + sane_kill(stage1_pid, SIGKILL); + sane_kill(stage2_pid, SIGKILL); + bail("failed to sync with stage-1: write(SYNC_USERMAP_ACK)"); } break; - case SYNC_RECVPID_PLS:{ - first_child = child; + case SYNC_RECVPID_PLS: + write_log(DEBUG, "stage-1 requested pid to be forwarded"); - /* Get the init_func pid. */ - if (read(syncfd, &child, sizeof(child)) != sizeof(child)) { - kill(first_child, SIGKILL); - bail("failed to sync with child: read(childpid)"); - } + /* Get the stage-2 pid. */ + if (read(syncfd, &stage2_pid, sizeof(stage2_pid)) != sizeof(stage2_pid)) { + sane_kill(stage1_pid, SIGKILL); + sane_kill(stage2_pid, SIGKILL); + bail("failed to sync with stage-1: read(stage2_pid)"); + } - /* Send ACK. */ - s = SYNC_RECVPID_ACK; - if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(first_child, SIGKILL); - kill(child, SIGKILL); - bail("failed to sync with child: write(SYNC_RECVPID_ACK)"); - } + /* Send ACK. */ + s = SYNC_RECVPID_ACK; + if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { + sane_kill(stage1_pid, SIGKILL); + sane_kill(stage2_pid, SIGKILL); + bail("failed to sync with stage-1: write(SYNC_RECVPID_ACK)"); + } - /* Send the init_func pid back to our parent. - * - * Send the init_func pid and the pid of the first child back to our parent. - * We need to send both back because we can't reap the first child we created (CLONE_PARENT). - * It becomes the responsibility of our parent to reap the first child. - */ - len = dprintf(pipenum, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child); - if (len < 0) { - kill(child, SIGKILL); - bail("unable to generate JSON for child pid"); - } + /* + * Send both the stage-1 and stage-2 pids back to runc. + * runc needs the stage-2 to continue process management, + * but because stage-1 was spawned with CLONE_PARENT we + * cannot reap it within stage-0 and thus we need to ask + * runc to reap the zombie for us. + */ + write_log(DEBUG, "forward stage-1 (%d) and stage-2 (%d) pids to runc", + stage1_pid, stage2_pid); + len = + dprintf(pipenum, "{\"stage1_pid\":%d,\"stage2_pid\":%d}\n", stage1_pid, + stage2_pid); + if (len < 0) { + sane_kill(stage1_pid, SIGKILL); + sane_kill(stage2_pid, SIGKILL); + bail("failed to sync with runc: write(pid-JSON)"); } break; - case SYNC_CHILD_READY: - ready = true; + case SYNC_CHILD_FINISH: + write_log(DEBUG, "stage-1 complete"); + stage1_complete = true; break; default: bail("unexpected sync value: %u", s); } } + write_log(DEBUG, "<- stage-1 synchronisation loop"); /* Now sync with grandchild. */ - syncfd = sync_grandchild_pipe[1]; close(sync_grandchild_pipe[0]); - - ready = false; - while (!ready) { + write_log(DEBUG, "-> stage-2 synchronisation loop"); + stage2_complete = false; + while (!stage2_complete) { enum sync_t s; + write_log(DEBUG, "signalling stage-2 to run"); s = SYNC_GRANDCHILD; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(child, SIGKILL); + sane_kill(stage2_pid, SIGKILL); bail("failed to sync with child: write(SYNC_GRANDCHILD)"); } @@ -806,27 +856,31 @@ void nsexec(void) bail("failed to sync with child: next state"); switch (s) { - case SYNC_CHILD_READY: - ready = true; + case SYNC_CHILD_FINISH: + write_log(DEBUG, "stage-2 complete"); + stage2_complete = true; break; default: bail("unexpected sync value: %u", s); } } + write_log(DEBUG, "<- stage-2 synchronisation loop"); + write_log(DEBUG, "<~ nsexec stage-0"); exit(0); } + break; /* * Stage 1: We're in the first child process. Our job is to join any - * provided namespaces in the netlink payload and unshare all - * of the requested namespaces. If we've been asked to - * CLONE_NEWUSER, we will ask our parent (stage 0) to set up - * our user mappings for us. Then, we create a new child - * (stage 2: JUMP_INIT) for PID namespace. We then send the - * child's PID to our parent (stage 0). + * provided namespaces in the netlink payload and unshare all of + * the requested namespaces. If we've been asked to CLONE_NEWUSER, + * we will ask our parent (stage 0) to set up our user mappings + * for us. Then, we create a new child (stage 2: STAGE_INIT) for + * PID namespace. We then send the child's PID to our parent + * (stage 0). */ - case JUMP_CHILD:{ - pid_t child; + case STAGE_CHILD:{ + pid_t stage2_pid = -1; enum sync_t s; /* We're in a child and thus need to tell the parent if we die. */ @@ -835,11 +889,12 @@ void nsexec(void) /* For debugging. */ prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0); + write_log(DEBUG, "~> nsexec stage-1"); /* * We need to setns first. We cannot do this earlier (in stage 0) * because of the fact that we forked to get here (the PID of - * [stage 2: JUMP_INIT]) would be meaningless). We could send it + * [stage 2: STAGE_INIT]) would be meaningless). We could send it * using cmsg(3) but that's just annoying. */ if (config.namespaces) @@ -865,40 +920,50 @@ void nsexec(void) * problem. */ if (config.cloneflags & CLONE_NEWUSER) { + write_log(DEBUG, "unshare user namespace"); if (unshare(CLONE_NEWUSER) < 0) bail("failed to unshare user namespace"); config.cloneflags &= ~CLONE_NEWUSER; /* - * We don't have the privileges to do any mapping here (see the - * clone_parent rant). So signal our parent to hook us up. + * We need to set ourselves as dumpable temporarily so that the + * parent process can write to our procfs files. */ - - /* Switching is only necessary if we joined namespaces. */ if (config.namespaces) { + write_log(DEBUG, "temporarily set process as dumpable"); if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0) - bail("failed to set process as dumpable"); + bail("failed to temporarily set process as dumpable"); } + + /* + * We don't have the privileges to do any mapping here (see the + * clone_parent rant). So signal stage-0 to do the mapping for + * us. + */ + write_log(DEBUG, "request stage-0 to map user namespace"); s = SYNC_USERMAP_PLS; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with parent: write(SYNC_USERMAP_PLS)"); /* ... wait for mapping ... */ - + write_log(DEBUG, "request stage-0 to map user namespace"); if (read(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with parent: read(SYNC_USERMAP_ACK)"); if (s != SYNC_USERMAP_ACK) bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s); - /* Switching is only necessary if we joined namespaces. */ + + /* Revert temporary re-dumpable setting. */ if (config.namespaces) { + write_log(DEBUG, "re-set process as non-dumpable"); if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0) - bail("failed to set process as dumpable"); + bail("failed to re-set process as non-dumpable"); } /* Become root in the namespace proper. */ if (setresuid(0, 0, 0) < 0) bail("failed to become root in user namespace"); } + /* * Unshare all of the namespaces. Now, it should be noted that this * ordering might break in the future (especially with rootless @@ -909,8 +974,9 @@ void nsexec(void) * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID) * was broken, so we'll just do it the long way anyway. */ + write_log(DEBUG, "unshare remaining namespace (except cgroupns)"); if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0) - bail("failed to unshare namespaces"); + bail("failed to unshare remaining namespaces (except cgroupns)"); /* * TODO: What about non-namespace clone flags that we're dropping here? @@ -921,41 +987,45 @@ void nsexec(void) * which would break many applications and libraries, so we must fork * to actually enter the new PID namespace. */ - child = clone_parent(&env, JUMP_INIT); - if (child < 0) - bail("unable to fork: init_func"); + write_log(DEBUG, "spawn stage-2"); + stage2_pid = clone_parent(&env, STAGE_INIT); + if (stage2_pid < 0) + bail("unable to spawn stage-2"); /* Send the child to our parent, which knows what it's doing. */ + write_log(DEBUG, "request stage-0 to forward stage-2 pid (%d)", stage2_pid); s = SYNC_RECVPID_PLS; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(child, SIGKILL); + sane_kill(stage2_pid, SIGKILL); bail("failed to sync with parent: write(SYNC_RECVPID_PLS)"); } - if (write(syncfd, &child, sizeof(child)) != sizeof(child)) { - kill(child, SIGKILL); - bail("failed to sync with parent: write(childpid)"); + if (write(syncfd, &stage2_pid, sizeof(stage2_pid)) != sizeof(stage2_pid)) { + sane_kill(stage2_pid, SIGKILL); + bail("failed to sync with parent: write(stage2_pid)"); } /* ... wait for parent to get the pid ... */ - if (read(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(child, SIGKILL); + sane_kill(stage2_pid, SIGKILL); bail("failed to sync with parent: read(SYNC_RECVPID_ACK)"); } if (s != SYNC_RECVPID_ACK) { - kill(child, SIGKILL); + sane_kill(stage2_pid, SIGKILL); bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s); } - s = SYNC_CHILD_READY; + write_log(DEBUG, "signal completion to stage-0"); + s = SYNC_CHILD_FINISH; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) { - kill(child, SIGKILL); - bail("failed to sync with parent: write(SYNC_CHILD_READY)"); + sane_kill(stage2_pid, SIGKILL); + bail("failed to sync with parent: write(SYNC_CHILD_FINISH)"); } - /* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */ + /* Our work is done. [Stage 2: STAGE_INIT] is doing the rest of the work. */ + write_log(DEBUG, "<~ nsexec stage-1"); exit(0); } + break; /* * Stage 2: We're the final child process, and the only process that will @@ -963,7 +1033,7 @@ void nsexec(void) * final cleanup steps and then return to the Go runtime to allow * init_linux.go to run. */ - case JUMP_INIT:{ + case STAGE_INIT:{ /* * We're inside the child now, having jumped from the * start_child() code after forking in the parent. @@ -978,6 +1048,7 @@ void nsexec(void) /* For debugging. */ prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0); + write_log(DEBUG, "~> nsexec stage-2"); if (read(syncfd, &s, sizeof(s)) != sizeof(s)) bail("failed to sync with parent: read(SYNC_GRANDCHILD)"); @@ -998,21 +1069,30 @@ void nsexec(void) bail("setgroups failed"); } - /* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */ + /* + * Wait until our topmost parent has finished cgroup setup in + * p.manager.Apply(). + * + * TODO(cyphar): Check if this code is actually needed because we + * should be in the cgroup even from stage-0, so + * waiting until now might not make sense. + */ if (config.cloneflags & CLONE_NEWCGROUP) { uint8_t value; if (read(pipenum, &value, sizeof(value)) != sizeof(value)) bail("read synchronisation value failed"); if (value == CREATECGROUPNS) { + write_log(DEBUG, "unshare cgroup namespace"); if (unshare(CLONE_NEWCGROUP) < 0) bail("failed to unshare cgroup namespace"); } else bail("received unknown synchronisation value"); } - s = SYNC_CHILD_READY; + write_log(DEBUG, "signal completion to stage-0"); + s = SYNC_CHILD_FINISH; if (write(syncfd, &s, sizeof(s)) != sizeof(s)) - bail("failed to sync with patent: write(SYNC_CHILD_READY)"); + bail("failed to sync with patent: write(SYNC_CHILD_FINISH)"); /* Close sync pipes. */ close(sync_grandchild_pipe[0]); @@ -1021,10 +1101,13 @@ void nsexec(void) nl_free(&config); /* Finish executing, let the Go runtime take over. */ + write_log(DEBUG, "<= nsexec container setup"); + write_log(DEBUG, "booting up go runtime ..."); return; } + break; default: - bail("unexpected jump value"); + bail("unknown stage '%d' for jump value", current_stage); } /* Should never be reached. */ diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.c b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.c new file mode 120000 index 0000000000..c53e316a22 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.c @@ -0,0 +1 @@ +../escape.c \ No newline at end of file diff --git a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.go b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.go new file mode 100644 index 0000000000..4accf967a4 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.go @@ -0,0 +1,53 @@ +package escapetest + +// This file is part of escape_json_string unit test. +// It is in a separate package so cgo can be used together +// with go test. + +// #include +// extern char *escape_json_string(char *str); +// #cgo CFLAGS: -DESCAPE_TEST=1 +import "C" + +import ( + "testing" + "unsafe" +) + +func testEscapeJsonString(t *testing.T, input, want string) { + in := C.CString(input) + out := C.escape_json_string(in) + got := C.GoString(out) + C.free(unsafe.Pointer(out)) + t.Logf("input: %q, output: %q", input, got) + if got != want { + t.Errorf("Failed on input: %q, want %q, got %q", input, want, got) + } +} + +func testEscapeJson(t *testing.T) { + testCases := []struct { + input, output string + }{ + {"", ""}, + {"abcdef", "abcdef"}, + {`\\\\\\`, `\\\\\\\\\\\\`}, + {`with"quote`, `with\"quote`}, + {"\n\r\b\t\f\\", `\n\r\b\t\f\\`}, + {"\007", "\\u0007"}, + {"\017 \020 \037", "\\u000f \\u0010 \\u001f"}, + {"\033", "\\u001b"}, + {`<->`, `<->`}, + {"\176\177\200", "~\\u007f\200"}, + {"\000", ""}, + {"a\x7fxc", "a\\u007fxc"}, + {"a\033xc", "a\\u001bxc"}, + {"a\nxc", "a\\nxc"}, + {"a\\xc", "a\\\\xc"}, + {"Barney B\303\244r", "Barney B\303\244r"}, + } + + for _, tc := range testCases { + testEscapeJsonString(t, tc.input, tc.output) + } +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go deleted file mode 100644 index 6fd8dd0d44..0000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go +++ /dev/null @@ -1,41 +0,0 @@ -package user - -import ( - "errors" -) - -var ( - // The current operating system does not provide the required data for user lookups. - ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data") - // No matching entries found in file. - ErrNoPasswdEntries = errors.New("no matching entries in passwd file") - ErrNoGroupEntries = errors.New("no matching entries in group file") -) - -// LookupUser looks up a user by their username in /etc/passwd. If the user -// cannot be found (or there is no /etc/passwd file on the filesystem), then -// LookupUser returns an error. -func LookupUser(username string) (User, error) { - return lookupUser(username) -} - -// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot -// be found (or there is no /etc/passwd file on the filesystem), then LookupId -// returns an error. -func LookupUid(uid int) (User, error) { - return lookupUid(uid) -} - -// LookupGroup looks up a group by its name in /etc/group. If the group cannot -// be found (or there is no /etc/group file on the filesystem), then LookupGroup -// returns an error. -func LookupGroup(groupname string) (Group, error) { - return lookupGroup(groupname) -} - -// LookupGid looks up a group by its group id in /etc/group. If the group cannot -// be found (or there is no /etc/group file on the filesystem), then LookupGid -// returns an error. -func LookupGid(gid int) (Group, error) { - return lookupGid(gid) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go index 92b5ae8de0..967717a1b1 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go @@ -16,13 +16,19 @@ const ( unixGroupPath = "/etc/group" ) -func lookupUser(username string) (User, error) { +// LookupUser looks up a user by their username in /etc/passwd. If the user +// cannot be found (or there is no /etc/passwd file on the filesystem), then +// LookupUser returns an error. +func LookupUser(username string) (User, error) { return lookupUserFunc(func(u User) bool { return u.Name == username }) } -func lookupUid(uid int) (User, error) { +// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot +// be found (or there is no /etc/passwd file on the filesystem), then LookupId +// returns an error. +func LookupUid(uid int) (User, error) { return lookupUserFunc(func(u User) bool { return u.Uid == uid }) @@ -51,13 +57,19 @@ func lookupUserFunc(filter func(u User) bool) (User, error) { return users[0], nil } -func lookupGroup(groupname string) (Group, error) { +// LookupGroup looks up a group by its name in /etc/group. If the group cannot +// be found (or there is no /etc/group file on the filesystem), then LookupGroup +// returns an error. +func LookupGroup(groupname string) (Group, error) { return lookupGroupFunc(func(g Group) bool { return g.Name == groupname }) } -func lookupGid(gid int) (Group, error) { +// LookupGid looks up a group by its group id in /etc/group. If the group cannot +// be found (or there is no /etc/group file on the filesystem), then LookupGid +// returns an error. +func LookupGid(gid int) (Group, error) { return lookupGroupFunc(func(g Group) bool { return g.Gid == gid }) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go deleted file mode 100644 index 65cd40e928..0000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go +++ /dev/null @@ -1,40 +0,0 @@ -// +build windows - -package user - -import ( - "fmt" - "os/user" -) - -func lookupUser(username string) (User, error) { - u, err := user.Lookup(username) - if err != nil { - return User{}, err - } - return userFromOS(u) -} - -func lookupUid(uid int) (User, error) { - u, err := user.LookupId(fmt.Sprintf("%d", uid)) - if err != nil { - return User{}, err - } - return userFromOS(u) -} - -func lookupGroup(groupname string) (Group, error) { - g, err := user.LookupGroup(groupname) - if err != nil { - return Group{}, err - } - return groupFromOS(g) -} - -func lookupGid(gid int) (Group, error) { - g, err := user.LookupGroupId(fmt.Sprintf("%d", gid)) - if err != nil { - return Group{}, err - } - return groupFromOS(g) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go index 4b89dad737..68da4400d4 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go @@ -2,10 +2,10 @@ package user import ( "bufio" + "errors" "fmt" "io" "os" - "os/user" "strconv" "strings" ) @@ -16,6 +16,13 @@ const ( ) var ( + // The current operating system does not provide the required data for user lookups. + ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data") + + // No matching entries found in file. + ErrNoPasswdEntries = errors.New("no matching entries in passwd file") + ErrNoGroupEntries = errors.New("no matching entries in group file") + ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minId, maxId) ) @@ -29,28 +36,6 @@ type User struct { Shell string } -// userFromOS converts an os/user.(*User) to local User -// -// (This does not include Pass, Shell or Gecos) -func userFromOS(u *user.User) (User, error) { - newUser := User{ - Name: u.Username, - Home: u.HomeDir, - } - id, err := strconv.Atoi(u.Uid) - if err != nil { - return newUser, err - } - newUser.Uid = id - - id, err = strconv.Atoi(u.Gid) - if err != nil { - return newUser, err - } - newUser.Gid = id - return newUser, nil -} - type Group struct { Name string Pass string @@ -58,23 +43,6 @@ type Group struct { List []string } -// groupFromOS converts an os/user.(*Group) to local Group -// -// (This does not include Pass or List) -func groupFromOS(g *user.Group) (Group, error) { - newGroup := Group{ - Name: g.Name, - } - - id, err := strconv.Atoi(g.Gid) - if err != nil { - return newGroup, err - } - newGroup.Gid = id - - return newGroup, nil -} - // SubID represents an entry in /etc/sub{u,g}id type SubID struct { Name string @@ -466,7 +434,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err // we asked for a group but didn't find it. let's check to see // if we wanted a numeric group if !found { - gid, err := strconv.Atoi(ag) + gid, err := strconv.ParseInt(ag, 10, 64) if err != nil { return nil, fmt.Errorf("Unable to find group %s", ag) } @@ -474,7 +442,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err if gid < minId || gid > maxId { return nil, ErrRange } - gidMap[gid] = struct{}{} + gidMap[int(gid)] = struct{}{} } } gids := []int{} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go b/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go new file mode 100644 index 0000000000..8c9bb5df39 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go @@ -0,0 +1,42 @@ +// +build gofuzz + +package user + +import ( + "io" + "strings" +) + +func IsDivisbleBy(n int, divisibleby int) bool { + return (n % divisibleby) == 0 +} + +func FuzzUser(data []byte) int { + if len(data) == 0 { + return -1 + } + if !IsDivisbleBy(len(data), 5) { + return -1 + } + + var divided [][]byte + + chunkSize := len(data) / 5 + + for i := 0; i < len(data); i += chunkSize { + end := i + chunkSize + + divided = append(divided, data[i:end]) + } + + _, _ = ParsePasswdFilter(strings.NewReader(string(divided[0])), nil) + + var passwd, group io.Reader + + group = strings.NewReader(string(divided[1])) + _, _ = GetAdditionalGroups([]string{string(divided[2])}, group) + + passwd = strings.NewReader(string(divided[3])) + _, _ = GetExecUser(string(divided[4]), nil, passwd, group) + return 1 +}