vendor: github.com/opencontainers/runc v1.0.0-rc95

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2021-06-21 12:54:15 +02:00 · 2021-06-21 12:54:15 +02:00 · 79a9fd61fd
parent 579279ce09
commit 79a9fd61fd
14 changed files with 662 additions and 402 deletions
--- a/vendor.conf
+++ b/vendor.conf
@ -55,7 +55,7 @@ github.com/modern-go/reflect2                       94122c33edd36123c84d5368cfb2
 github.com/morikuni/aec                             39771216ff4c63d11f5e604076f9c45e8be1067b # v1.0.0
 github.com/opencontainers/go-digest                 ea51bea511f75cfa3ef6098cc253c5c3609b037a # v1.0.0
 github.com/opencontainers/image-spec                d60099175f88c47cd379c4738d158884749ed235 # v1.0.1
-github.com/opencontainers/runc                      ff819c7e9184c13b7c2607fe6c30ae19403a7aff # v1.0.0-rc92
+github.com/opencontainers/runc                      b9ee9c6314599f1b4a7f497e1f1f856fe433d3b7 # v1.0.0-rc95
 github.com/opentracing/opentracing-go               d34af3eaa63c4d08ab54863a4bdd0daa45212e12 # v1.2.0
 github.com/pkg/errors                               614d223910a179a466c1767a985424175c39b465 # v0.9.1
 github.com/prometheus/client_golang                 6edbbd9e560190e318cdc5b4d3e630b442858380 # v1.6.0
--- a/vendor/github.com/opencontainers/runc/README.md
+++ b/vendor/github.com/opencontainers/runc/README.md
@ -1,9 +1,10 @@
 # runc

-[![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc)
 [![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc)
 [![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc)
 [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/588/badge)](https://bestpractices.coreinfrastructure.org/projects/588)
+[![gha/validate](https://github.com/opencontainers/runc/workflows/validate/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Avalidate)
+[![gha/ci](https://github.com/opencontainers/runc/workflows/ci/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Aci)

 ## Introduction

@ -17,10 +18,6 @@ This means that `runc` 1.0.0 should implement the 1.0 version of the specificati

 You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.

-Currently, the following features are not considered to be production-ready:
-
-* [Support for cgroup v2](./docs/cgroup-v2.md)
-
 ## Security

 The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md).
@ -64,19 +61,20 @@ sudo make install
 with some of them enabled by default (see `BUILDTAGS` in top-level `Makefile`).

 To change build tags from the default, set the `BUILDTAGS` variable for make,
-e.g.
+e.g. to disable seccomp:

 ```bash
-make BUILDTAGS='seccomp apparmor'
+make BUILDTAGS=""
 ```

 | Build Tag | Feature                            | Enabled by default | Dependency |
 |-----------|------------------------------------|--------------------|------------|
 | seccomp   | Syscall filtering                  | yes                | libseccomp |
-| selinux   | selinux process and mount labeling | yes                | <none>     |
-| apparmor  | apparmor profile support           | yes                | <none>     |
-| nokmem    | disable kernel memory accounting   | no                 | <none>     |

+The following build tags were used earlier, but are now obsoleted:
+ - **nokmem** (since runc v1.0.0-rc94 kernel memory settings are ignored)
+ - **apparmor** (since runc v1.0.0-rc93 the feature is always enabled)
+ - **selinux**  (since runc v1.0.0-rc93 the feature is always enabled)

 ### Running the test suite

@ -128,6 +126,14 @@ make verify-dependencies

 ## Using runc

+Please note that runc is a low level tool not designed with an end user
+in mind. It is mostly employed by other higher level container software.
+
+Therefore, unless there is some specific use case that prevents the use
+of tools like Docker or Podman, it is not recommended to use runc directly.
+
+If you still want to use runc, here's how.
+
 ### Creating an OCI Bundle

 In order to use runc you must have your container in the format of an OCI bundle.
@ -169,7 +175,9 @@ If you used the unmodified `runc spec` template this should give you a `sh` sess

 The second way to start a container is using the specs lifecycle operations.
 This gives you more power over how the container is created and managed while it is running.
-This will also launch the container in the background so you will have to edit the `config.json` to remove the `terminal` setting for the simple examples here.
+This will also launch the container in the background so you will have to edit
+the `config.json` to remove the `terminal` setting for the simple examples
+below (see more details about [runc terminal handling](docs/terminals.md)).
 Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`.


@ -292,8 +300,12 @@ PIDFile=/run/mycontainerid.pid
 WantedBy=multi-user.target
 ```

-#### cgroup v2
-See [`./docs/cgroup-v2.md`](./docs/cgroup-v2.md).
+## More documentation
+
+* [cgroup v2](./docs/cgroup-v2.md)
+* [Checkpoint and restore](./docs/checkpoint-restore.md)
+* [systemd cgroup driver](./docs/systemd.md)
+* [Terminals and standard IO](./docs/terminals.md)

 ## License

--- a/vendor/github.com/opencontainers/runc/go.mod
+++ b/vendor/github.com/opencontainers/runc/go.mod
@ -1,26 +1,28 @@
 module github.com/opencontainers/runc

-go 1.14
+go 1.13

 require (
-	github.com/checkpoint-restore/go-criu/v4 v4.1.0
-	github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775
-	github.com/containerd/console v1.0.0
-	github.com/coreos/go-systemd/v22 v22.1.0
+	github.com/checkpoint-restore/go-criu/v5 v5.0.0
+	github.com/cilium/ebpf v0.5.0
+	github.com/containerd/console v1.0.2
+	github.com/coreos/go-systemd/v22 v22.3.1
 	github.com/cyphar/filepath-securejoin v0.2.2
 	github.com/docker/go-units v0.4.0
-	github.com/godbus/dbus/v5 v5.0.3
-	github.com/golang/protobuf v1.4.2
-	github.com/moby/sys/mountinfo v0.1.3
-	github.com/mrunalp/fileutils v0.0.0-20200520151820-abd8a0e76976
-	github.com/opencontainers/runtime-spec v1.0.3-0.20200728170252-4d89ac9fbff6
-	github.com/opencontainers/selinux v1.6.0
+	github.com/godbus/dbus/v5 v5.0.4
+	github.com/moby/sys/mountinfo v0.4.1
+	github.com/mrunalp/fileutils v0.5.0
+	github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
+	github.com/opencontainers/selinux v1.8.0
 	github.com/pkg/errors v0.9.1
 	github.com/seccomp/libseccomp-golang v0.9.1
-	github.com/sirupsen/logrus v1.6.0
-	github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2
+	github.com/sirupsen/logrus v1.7.0
+	github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635
 	// NOTE: urfave/cli must be <= v1.22.1 due to a regression: https://github.com/urfave/cli/issues/1092
 	github.com/urfave/cli v1.22.1
 	github.com/vishvananda/netlink v1.1.0
-	golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1
+	github.com/willf/bitset v1.1.11
+	golang.org/x/net v0.0.0-20201224014010-6772e930b67b
+	golang.org/x/sys v0.0.0-20210426230700-d19ff857e887
+	google.golang.org/protobuf v1.25.0
 )
--- a/vendor/github.com/opencontainers/runc/libcontainer/README.md
+++ b/vendor/github.com/opencontainers/runc/libcontainer/README.md
@ -57,90 +57,94 @@ struct describing how the container is to be created. A sample would look simila

 ```go
 defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
+var devices []*configs.DeviceRule
+for _, device := range specconv.AllowedDevices {
+	devices = append(devices, &device.Rule)
+}
 config := &configs.Config{
 	Rootfs: "/your/path/to/rootfs",
 	Capabilities: &configs.Capabilities{
-                Bounding: []string{
-                        "CAP_CHOWN",
-                        "CAP_DAC_OVERRIDE",
-                        "CAP_FSETID",
-                        "CAP_FOWNER",
-                        "CAP_MKNOD",
-                        "CAP_NET_RAW",
-                        "CAP_SETGID",
-                        "CAP_SETUID",
-                        "CAP_SETFCAP",
-                        "CAP_SETPCAP",
-                        "CAP_NET_BIND_SERVICE",
-                        "CAP_SYS_CHROOT",
-                        "CAP_KILL",
-                        "CAP_AUDIT_WRITE",
-                },
-                Effective: []string{
-                        "CAP_CHOWN",
-                        "CAP_DAC_OVERRIDE",
-                        "CAP_FSETID",
-                        "CAP_FOWNER",
-                        "CAP_MKNOD",
-                        "CAP_NET_RAW",
-                        "CAP_SETGID",
-                        "CAP_SETUID",
-                        "CAP_SETFCAP",
-                        "CAP_SETPCAP",
-                        "CAP_NET_BIND_SERVICE",
-                        "CAP_SYS_CHROOT",
-                        "CAP_KILL",
-                        "CAP_AUDIT_WRITE",
-                },
-                Inheritable: []string{
-                        "CAP_CHOWN",
-                        "CAP_DAC_OVERRIDE",
-                        "CAP_FSETID",
-                        "CAP_FOWNER",
-                        "CAP_MKNOD",
-                        "CAP_NET_RAW",
-                        "CAP_SETGID",
-                        "CAP_SETUID",
-                        "CAP_SETFCAP",
-                        "CAP_SETPCAP",
-                        "CAP_NET_BIND_SERVICE",
-                        "CAP_SYS_CHROOT",
-                        "CAP_KILL",
-                        "CAP_AUDIT_WRITE",
-                },
-                Permitted: []string{
-                        "CAP_CHOWN",
-                        "CAP_DAC_OVERRIDE",
-                        "CAP_FSETID",
-                        "CAP_FOWNER",
-                        "CAP_MKNOD",
-                        "CAP_NET_RAW",
-                        "CAP_SETGID",
-                        "CAP_SETUID",
-                        "CAP_SETFCAP",
-                        "CAP_SETPCAP",
-                        "CAP_NET_BIND_SERVICE",
-                        "CAP_SYS_CHROOT",
-                        "CAP_KILL",
-                        "CAP_AUDIT_WRITE",
-                },
-                Ambient: []string{
-                        "CAP_CHOWN",
-                        "CAP_DAC_OVERRIDE",
-                        "CAP_FSETID",
-                        "CAP_FOWNER",
-                        "CAP_MKNOD",
-                        "CAP_NET_RAW",
-                        "CAP_SETGID",
-                        "CAP_SETUID",
-                        "CAP_SETFCAP",
-                        "CAP_SETPCAP",
-                        "CAP_NET_BIND_SERVICE",
-                        "CAP_SYS_CHROOT",
-                        "CAP_KILL",
-                        "CAP_AUDIT_WRITE",
-                },
-        },
+		Bounding: []string{
+			"CAP_CHOWN",
+			"CAP_DAC_OVERRIDE",
+			"CAP_FSETID",
+			"CAP_FOWNER",
+			"CAP_MKNOD",
+			"CAP_NET_RAW",
+			"CAP_SETGID",
+			"CAP_SETUID",
+			"CAP_SETFCAP",
+			"CAP_SETPCAP",
+			"CAP_NET_BIND_SERVICE",
+			"CAP_SYS_CHROOT",
+			"CAP_KILL",
+			"CAP_AUDIT_WRITE",
+		},
+		Effective: []string{
+			"CAP_CHOWN",
+			"CAP_DAC_OVERRIDE",
+			"CAP_FSETID",
+			"CAP_FOWNER",
+			"CAP_MKNOD",
+			"CAP_NET_RAW",
+			"CAP_SETGID",
+			"CAP_SETUID",
+			"CAP_SETFCAP",
+			"CAP_SETPCAP",
+			"CAP_NET_BIND_SERVICE",
+			"CAP_SYS_CHROOT",
+			"CAP_KILL",
+			"CAP_AUDIT_WRITE",
+		},
+		Inheritable: []string{
+			"CAP_CHOWN",
+			"CAP_DAC_OVERRIDE",
+			"CAP_FSETID",
+			"CAP_FOWNER",
+			"CAP_MKNOD",
+			"CAP_NET_RAW",
+			"CAP_SETGID",
+			"CAP_SETUID",
+			"CAP_SETFCAP",
+			"CAP_SETPCAP",
+			"CAP_NET_BIND_SERVICE",
+			"CAP_SYS_CHROOT",
+			"CAP_KILL",
+			"CAP_AUDIT_WRITE",
+		},
+		Permitted: []string{
+			"CAP_CHOWN",
+			"CAP_DAC_OVERRIDE",
+			"CAP_FSETID",
+			"CAP_FOWNER",
+			"CAP_MKNOD",
+			"CAP_NET_RAW",
+			"CAP_SETGID",
+			"CAP_SETUID",
+			"CAP_SETFCAP",
+			"CAP_SETPCAP",
+			"CAP_NET_BIND_SERVICE",
+			"CAP_SYS_CHROOT",
+			"CAP_KILL",
+			"CAP_AUDIT_WRITE",
+		},
+		Ambient: []string{
+			"CAP_CHOWN",
+			"CAP_DAC_OVERRIDE",
+			"CAP_FSETID",
+			"CAP_FOWNER",
+			"CAP_MKNOD",
+			"CAP_NET_RAW",
+			"CAP_SETGID",
+			"CAP_SETUID",
+			"CAP_SETFCAP",
+			"CAP_SETPCAP",
+			"CAP_NET_BIND_SERVICE",
+			"CAP_SYS_CHROOT",
+			"CAP_KILL",
+			"CAP_AUDIT_WRITE",
+		},
+	},
 	Namespaces: configs.Namespaces([]configs.Namespace{
 		{Type: configs.NEWNS},
 		{Type: configs.NEWUTS},
@ -155,7 +159,7 @@ config := &configs.Config{
 		Parent: "system",
 		Resources: &configs.Resources{
 			MemorySwappiness: nil,
-			Devices:          specconv.AllowedDevices,
+			Devices:          devices,
 		},
 	},
 	MaskPaths: []string{
@ -313,7 +317,7 @@ state, err := container.State()
 #### Checkpoint & Restore

 libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
-This let's you save the state of a process running inside a container to disk, and then restore
+This lets you save the state of a process running inside a container to disk, and then restore
 that state into a new process, on the same machine or on another machine.

 `criu` version 1.5.2 or higher is required to use checkpoint and restore.
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/cloned_binary.c
@ -59,14 +59,38 @@
 #include <sys/syscall.h>

 /* Use our own wrapper for memfd_create. */
-#if !defined(SYS_memfd_create) && defined(__NR_memfd_create)
-#  define SYS_memfd_create __NR_memfd_create
+#ifndef SYS_memfd_create
+#  ifdef __NR_memfd_create
+#    define SYS_memfd_create __NR_memfd_create
+#  else
+/* These values come from <https://fedora.juszkiewicz.com.pl/syscalls.html>. */
+#    warning "libc is outdated -- using hard-coded SYS_memfd_create"
+#    if defined(__x86_64__)
+#      define SYS_memfd_create 319
+#    elif defined(__i386__)
+#      define SYS_memfd_create 356
+#    elif defined(__ia64__)
+#      define SYS_memfd_create 1340
+#    elif defined(__arm__)
+#      define SYS_memfd_create 385
+#    elif defined(__aarch64__)
+#      define SYS_memfd_create 279
+#    elif defined(__ppc__) || defined(__PPC64__) || defined(__powerpc64__)
+#      define SYS_memfd_create 360
+#    elif defined(__s390__) || defined(__s390x__)
+#      define SYS_memfd_create 350
+#    else
+#      warning "unknown architecture -- cannot hard-code SYS_memfd_create"
+#    endif
+#  endif
 #endif
+
 /* memfd_create(2) flags -- copied from <linux/memfd.h>. */
 #ifndef MFD_CLOEXEC
 #  define MFD_CLOEXEC       0x0001U
 #  define MFD_ALLOW_SEALING 0x0002U
 #endif
+
 int memfd_create(const char *name, unsigned int flags)
 {
 #ifdef SYS_memfd_create
@ -77,7 +101,6 @@ int memfd_create(const char *name, unsigned int flags)
 #endif
 }

-
 /* This comes directly from <linux/fcntl.h>. */
 #ifndef F_LINUX_SPECIFIC_BASE
 #  define F_LINUX_SPECIFIC_BASE 1024
@ -103,7 +126,7 @@ static void *must_realloc(void *ptr, size_t size)
 	void *old = ptr;
 	do {
 		ptr = realloc(old, size);
-	} while(!ptr);
+	} while (!ptr);
 	return ptr;
 }

@ -115,10 +138,10 @@ static void *must_realloc(void *ptr, size_t size)
 static int is_self_cloned(void)
 {
 	int fd, ret, is_cloned = 0;
-	struct stat statbuf = {};
-	struct statfs fsbuf = {};
+	struct stat statbuf = { };
+	struct statfs fsbuf = { };

-	fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
+	fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
 	if (fd < 0) {
 		fprintf(stderr, "you have no read access to runc binary file\n");
 		return -ENOTRECOVERABLE;
@ -274,7 +297,7 @@ enum {
 static int make_execfd(int *fdtype)
 {
 	int fd = -1;
-	char template[PATH_MAX] = {0};
+	char template[PATH_MAX] = { 0 };
 	char *prefix = getenv("_LIBCONTAINER_STATEDIR");

 	if (!prefix || *prefix != '/')
@ -303,7 +326,7 @@ static int make_execfd(int *fdtype)
 	*fdtype = EFD_FILE;
 	fd = open(prefix, O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0700);
 	if (fd >= 0) {
-		struct stat statbuf = {};
+		struct stat statbuf = { };
 		bool working_otmpfile = false;

 		/*
@ -348,27 +371,27 @@ static int seal_execfd(int *fd, int fdtype)
 	switch (fdtype) {
 	case EFD_MEMFD:
 		return fcntl(*fd, F_ADD_SEALS, RUNC_MEMFD_SEALS);
-	case EFD_FILE: {
-		/* Need to re-open our pseudo-memfd as an O_PATH to avoid execve(2) giving -ETXTBSY. */
-		int newfd;
-		char fdpath[PATH_MAX] = {0};
+	case EFD_FILE:{
+			/* Need to re-open our pseudo-memfd as an O_PATH to avoid execve(2) giving -ETXTBSY. */
+			int newfd;
+			char fdpath[PATH_MAX] = { 0 };

-		if (fchmod(*fd, 0100) < 0)
-			return -1;
+			if (fchmod(*fd, 0100) < 0)
+				return -1;

-		if (snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", *fd) < 0)
-			return -1;
+			if (snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", *fd) < 0)
+				return -1;

-		newfd = open(fdpath, O_PATH | O_CLOEXEC);
-		if (newfd < 0)
-			return -1;
+			newfd = open(fdpath, O_PATH | O_CLOEXEC);
+			if (newfd < 0)
+				return -1;

-		close(*fd);
-		*fd = newfd;
-		return 0;
-	}
+			close(*fd);
+			*fd = newfd;
+			return 0;
+		}
 	default:
-	   break;
+		break;
 	}
 	return -1;
 }
@ -376,7 +399,7 @@ static int seal_execfd(int *fd, int fdtype)
 static int try_bindfd(void)
 {
 	int fd, ret = -1;
-	char template[PATH_MAX] = {0};
+	char template[PATH_MAX] = { 0 };
 	char *prefix = getenv("_LIBCONTAINER_STATEDIR");

 	if (!prefix || *prefix != '/')
@ -404,7 +427,6 @@ static int try_bindfd(void)
 	if (mount("", template, "", MS_REMOUNT | MS_BIND | MS_RDONLY, "") < 0)
 		goto out_umount;

-
 	/* Get read-only handle that we're sure can't be made read-write. */
 	ret = open(template, O_PATH | O_CLOEXEC);

@ -448,7 +470,7 @@ static ssize_t fd_to_fd(int outfd, int infd)
 			if (n < 0)
 				return -1;
 			nwritten += n;
-		} while(nwritten < nread);
+		} while (nwritten < nread);

 		total += nwritten;
 	}
@ -459,7 +481,7 @@ static ssize_t fd_to_fd(int outfd, int infd)
 static int clone_binary(void)
 {
 	int binfd, execfd;
-	struct stat statbuf = {};
+	struct stat statbuf = { };
 	size_t sent = 0;
 	int fdtype = EFD_NONE;

--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/escape.c
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/escape.c
@ -0,0 +1,142 @@
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef ESCAPE_TEST
+#  include <assert.h>
+#  define test_assert(arg) assert(arg)
+#else
+#  define test_assert(arg)
+#endif
+
+#define DEL '\x7f'
+
+/*
+ * Poor man version of itoa with base=16 and input number from 0 to 15,
+ * represented by a char. Converts it to a single hex digit ('0' to 'f').
+ */
+static char hex(char i)
+{
+	test_assert(i >= 0 && i < 16);
+
+	if (i >= 0 && i < 10) {
+		return '0' + i;
+	}
+	if (i >= 10 && i < 16) {
+		return 'a' + i - 10;
+	}
+	return '?';
+}
+
+/*
+ * Given the character, tells how many _extra_ characters are needed
+ * to JSON-escape it. If 0 is returned, the character does not need to
+ * be escaped.
+ */
+static int need_escape(char c)
+{
+	switch (c) {
+	case '\\':
+	case '"':
+	case '\b':
+	case '\n':
+	case '\r':
+	case '\t':
+	case '\f':
+		return 1;
+	case DEL:		// -> \u007f
+		return 5;
+	default:
+		if (c > 0 && c < ' ') {
+			// ASCII decimal 01 to 31 -> \u00xx
+			return 5;
+		}
+		return 0;
+	}
+}
+
+/*
+ * Escape the string so it can be used as a JSON string (per RFC4627,
+ * section 2.5 minimal requirements, plus the DEL (0x7f) character).
+ *
+ * It is expected that the argument is a string allocated via malloc.
+ * In case no escaping is needed, the original string is returned as is;
+ * otherwise, the original string is free'd, and the newly allocated
+ * escaped string is returned. Thus, in any case, the value returned
+ * need to be free'd by the caller.
+ */
+char *escape_json_string(char *s)
+{
+	int i, j, len;
+	char *c, *out;
+
+	/*
+	 * First, check if escaping is at all needed -- if not, we can avoid
+	 * malloc and return the argument as is.  While at it, count how much
+	 * extra space is required.
+	 *
+	 * XXX: the counting code must be in sync with the escaping code
+	 * (checked by test_assert()s below).
+	 */
+	for (i = j = 0; s[i] != '\0'; i++) {
+		j += need_escape(s[i]);
+	}
+	if (j == 0) {
+		// nothing to escape
+		return s;
+	}
+
+	len = i + j + 1;
+	out = malloc(len);
+	if (!out) {
+		free(s);
+		// As malloc failed, strdup can fail, too, so in the worst case
+		// scenario NULL will be returned from here.
+		return strdup("escape_json_string: out of memory");
+	}
+	for (c = s, j = 0; *c != '\0'; c++) {
+		switch (*c) {
+		case '"':
+		case '\\':
+			test_assert(need_escape(*c) == 1);
+			out[j++] = '\\';
+			out[j++] = *c;
+			continue;
+		}
+		if ((*c < 0 || *c >= ' ') && (*c != DEL)) {
+			// no escape needed
+			test_assert(need_escape(*c) == 0);
+			out[j++] = *c;
+			continue;
+		}
+		out[j++] = '\\';
+		switch (*c) {
+		case '\b':
+			out[j++] = 'b';
+			break;
+		case '\n':
+			out[j++] = 'n';
+			break;
+		case '\r':
+			out[j++] = 'r';
+			break;
+		case '\t':
+			out[j++] = 't';
+			break;
+		case '\f':
+			out[j++] = 'f';
+			break;
+		default:
+			test_assert(need_escape(*c) == 5);
+			out[j++] = 'u';
+			out[j++] = '0';
+			out[j++] = '0';
+			out[j++] = hex(*c >> 4);
+			out[j++] = hex(*c & 0x0f);
+		}
+	}
+	test_assert(j + 1 == len);
+	out[j] = '\0';
+
+	free(s);
+	return out;
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
@ -29,6 +29,8 @@
 /* Get all of the CLONE_NEW* flags. */
 #include "namespace.h"

+extern char *escape_json_string(char *str);
+
 /* Synchronisation values. */
 enum sync_t {
 	SYNC_USERMAP_PLS = 0x40,	/* Request parent to map our users. */
@ -36,7 +38,7 @@ enum sync_t {
 	SYNC_RECVPID_PLS = 0x42,	/* Tell parent we're sending the PID. */
 	SYNC_RECVPID_ACK = 0x43,	/* PID was correctly received by parent. */
 	SYNC_GRANDCHILD = 0x44,	/* The grandchild is ready to run. */
-	SYNC_CHILD_READY = 0x45,	/* The child or grandchild is ready to return. */
+	SYNC_CHILD_FINISH = 0x45,	/* The child or grandchild has finished. */
 };

 /*
@ -45,10 +47,14 @@ enum sync_t {
 */
 #define CREATECGROUPNS 0x80

+#define STAGE_SETUP  -1
 /* longjmp() arguments. */
-#define JUMP_PARENT 0x00
-#define JUMP_CHILD  0xA0
-#define JUMP_INIT   0xA1
+#define STAGE_PARENT  0
+#define STAGE_CHILD   1
+#define STAGE_INIT    2
+
+/* Stores the current stage of nsexec. */
+int current_stage = STAGE_SETUP;

 /* Assume the stack grows down, so arguments should be above it. */
 struct clone_t {
@ -56,7 +62,7 @@ struct clone_t {
 	 * Reserve some space for clone() to locate arguments
 	 * and retcode in this place
 	 */
-	char stack[4096] __attribute__ ((aligned(16)));
+	char stack[4096] __attribute__((aligned(16)));
 	char stack_ptr[0];

 	/* There's two children. This is used to execute the different code. */
@ -102,31 +108,31 @@ static int logfd = -1;
 * List of netlink message types sent to us as part of bootstrapping the init.
 * These constants are defined in libcontainer/message_linux.go.
 */
-#define INIT_MSG			62000
+#define INIT_MSG		62000
 #define CLONE_FLAGS_ATTR	27281
 #define NS_PATHS_ATTR		27282
-#define UIDMAP_ATTR			27283
-#define GIDMAP_ATTR			27284
+#define UIDMAP_ATTR		27283
+#define GIDMAP_ATTR		27284
 #define SETGROUP_ATTR		27285
 #define OOM_SCORE_ADJ_ATTR	27286
 #define ROOTLESS_EUID_ATTR	27287
-#define UIDMAPPATH_ATTR	    27288
-#define GIDMAPPATH_ATTR	    27289
+#define UIDMAPPATH_ATTR		27288
+#define GIDMAPPATH_ATTR		27289

 /*
 * Use the raw syscall for versions of glibc which don't include a function for
 * it, namely (glibc 2.12).
 */
 #if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
-#	define _GNU_SOURCE
-#	include "syscall.h"
-#	if !defined(SYS_setns) && defined(__NR_setns)
-#		define SYS_setns __NR_setns
-#	endif
+#  define _GNU_SOURCE
+#  include "syscall.h"
+#  if !defined(SYS_setns) && defined(__NR_setns)
+#    define SYS_setns __NR_setns
+#  endif

-#ifndef SYS_setns
-#	error "setns(2) syscall not supported by glibc version"
-#endif
+#  ifndef SYS_setns
+#    error "setns(2) syscall not supported by glibc version"
+#  endif

 int setns(int fd, int nstype)
 {
@ -134,33 +140,43 @@ int setns(int fd, int nstype)
 }
 #endif

-static void write_log_with_info(const char *level, const char *function, int line, const char *format, ...)
+static void write_log(const char *level, const char *format, ...)
 {
-	char message[1024] = {};
-
+	char *message = NULL, *stage = NULL;
 	va_list args;
+	int ret;

 	if (logfd < 0 || level == NULL)
-		return;
+		goto out;

 	va_start(args, format);
-	if (vsnprintf(message, sizeof(message), format, args) < 0)
-		goto done;
-
-	dprintf(logfd, "{\"level\":\"%s\", \"msg\": \"%s:%d %s\"}\n", level, function, line, message);
-done:
+	ret = vasprintf(&message, format, args);
 	va_end(args);
-}
+	if (ret < 0)
+		goto out;

-#define write_log(level, fmt, ...) \
-	write_log_with_info((level), __FUNCTION__, __LINE__, (fmt), ##__VA_ARGS__)
+	message = escape_json_string(message);
+
+	if (current_stage == STAGE_SETUP)
+		stage = strdup("nsexec");
+	else
+		ret = asprintf(&stage, "nsexec-%d", current_stage);
+	if (ret < 0)
+		goto out;
+
+	dprintf(logfd, "{\"level\":\"%s\", \"msg\": \"%s[%d]: %s\"}\n", level, stage, getpid(), message);
+
+out:
+	free(message);
+	free(stage);
+}

 /* XXX: This is ugly. */
 static int syncfd = -1;

 #define bail(fmt, ...)                                       \
 	do {                                                       \
-		write_log(FATAL, "nsenter: " fmt ": %m", ##__VA_ARGS__); \
+		write_log(FATAL, fmt ": %m", ##__VA_ARGS__); \
 		exit(1);                                                 \
 	} while(0)

@ -187,7 +203,7 @@ static int write_file(char *data, size_t data_len, char *pathfmt, ...)
 		goto out;
 	}

- out:
+out:
 	close(fd);
 	return ret;
 }
@ -297,9 +313,11 @@ static void update_uidmap(const char *path, int pid, char *map, size_t map_len)
 	if (map == NULL || map_len <= 0)
 		return;

+	write_log(DEBUG, "update /proc/%d/uid_map to '%s'", pid, map);
 	if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) {
 		if (errno != EPERM)
 			bail("failed to update /proc/%d/uid_map", pid);
+		write_log(DEBUG, "update /proc/%d/uid_map got -EPERM (trying %s)", pid, path);
 		if (try_mapping_tool(path, pid, map, map_len))
 			bail("failed to use newuid map on %d", pid);
 	}
@ -310,9 +328,11 @@ static void update_gidmap(const char *path, int pid, char *map, size_t map_len)
 	if (map == NULL || map_len <= 0)
 		return;

+	write_log(DEBUG, "update /proc/%d/gid_map to '%s'", pid, map);
 	if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) {
 		if (errno != EPERM)
 			bail("failed to update /proc/%d/gid_map", pid);
+		write_log(DEBUG, "update /proc/%d/gid_map got -EPERM (trying %s)", pid, path);
 		if (try_mapping_tool(path, pid, map, map_len))
 			bail("failed to use newgid map on %d", pid);
 	}
@ -323,19 +343,20 @@ static void update_oom_score_adj(char *data, size_t len)
 	if (data == NULL || len <= 0)
 		return;

+	write_log(DEBUG, "update /proc/self/oom_score_adj to '%s'", data);
 	if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
 		bail("failed to update /proc/self/oom_score_adj");
 }

 /* A dummy function that just jumps to the given jumpval. */
-static int child_func(void *arg) __attribute__ ((noinline));
+static int child_func(void *arg) __attribute__((noinline));
 static int child_func(void *arg)
 {
 	struct clone_t *ca = (struct clone_t *)arg;
 	longjmp(*ca->env, ca->jmpval);
 }

-static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline));
+static int clone_parent(jmp_buf *env, int jmpval) __attribute__((noinline));
 static int clone_parent(jmp_buf *env, int jmpval)
 {
 	struct clone_t ca = {
@ -507,7 +528,6 @@ void join_namespaces(char *nslist)
 	char *namespace = strtok_r(nslist, ",", &saveptr);
 	struct namespace_t {
 		int fd;
-		int ns;
 		char type[PATH_MAX];
 		char path[PATH_MAX];
 	} *namespaces = NULL;
@ -542,7 +562,7 @@ void join_namespaces(char *nslist)
 			bail("failed to open %s", path);

 		ns->fd = fd;
-		ns->ns = nsflag(namespace);
+		strncpy(ns->type, namespace, PATH_MAX - 1);
 		strncpy(ns->path, path, PATH_MAX - 1);
 		ns->path[PATH_MAX - 1] = '\0';
 	} while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL);
@ -555,12 +575,14 @@ void join_namespaces(char *nslist)
 	 */

 	for (i = 0; i < num; i++) {
-		struct namespace_t ns = namespaces[i];
+		struct namespace_t *ns = &namespaces[i];
+		int flag = nsflag(ns->type);

-		if (setns(ns.fd, ns.ns) < 0)
-			bail("failed to setns to %s", ns.path);
+		write_log(DEBUG, "setns(%#x) into %s namespace (with path %s)", flag, ns->type, ns->path);
+		if (setns(ns->fd, flag) < 0)
+			bail("failed to setns into %s namespace", ns->type);

-		close(ns.fd);
+		close(ns->fd);
 	}

 	free(namespaces);
@ -569,6 +591,14 @@ void join_namespaces(char *nslist)
 /* Defined in cloned_binary.c. */
 extern int ensure_cloned_binary(void);

+static inline int sane_kill(pid_t pid, int signum)
+{
+	if (pid > 0)
+		return kill(pid, signum);
+	else
+		return 0;
+}
+
 void nsexec(void)
 {
 	int pipenum;
@ -598,7 +628,14 @@ void nsexec(void)
 	if (ensure_cloned_binary() < 0)
 		bail("could not ensure we are a cloned binary");

-	write_log(DEBUG, "nsexec started");
+	/*
+	 * Inform the parent we're past initial setup.
+	 * For the other side of this, see initWaiter.
+	 */
+	if (write(pipenum, "", 1) != 1)
+		bail("could not inform the parent we are past initial setup");
+
+	write_log(DEBUG, "=> nsexec container setup");

 	/* Parse all of the netlink configuration. */
 	nl_parse(pipenum, &config);
@ -622,6 +659,7 @@ void nsexec(void)
 	 * containers), which is the recommendation from the kernel folks.
 	 */
 	if (config.namespaces) {
+		write_log(DEBUG, "set process as non-dumpable");
 		if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
 			bail("failed to set process as non-dumpable");
 	}
@ -686,45 +724,49 @@ void nsexec(void)
 	 * -- Aleksa "what has my life come to?" Sarai
 	 */

-	switch (setjmp(env)) {
+	current_stage = setjmp(env);
+	switch (current_stage) {
 		/*
 		 * Stage 0: We're in the parent. Our job is just to create a new child
-		 *          (stage 1: JUMP_CHILD) process and write its uid_map and
+		 *          (stage 1: STAGE_CHILD) process and write its uid_map and
 		 *          gid_map. That process will go on to create a new process, then
 		 *          it will send us its PID which we will send to the bootstrap
 		 *          process.
 		 */
-	case JUMP_PARENT:{
+	case STAGE_PARENT:{
 			int len;
-			pid_t child, first_child = -1;
-			bool ready = false;
+			pid_t stage1_pid = -1, stage2_pid = -1;
+			bool stage1_complete, stage2_complete;

 			/* For debugging. */
 			prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0);
+			write_log(DEBUG, "~> nsexec stage-0");

 			/* Start the process of getting a container. */
-			child = clone_parent(&env, JUMP_CHILD);
-			if (child < 0)
-				bail("unable to fork: child_func");
+			write_log(DEBUG, "spawn stage-1");
+			stage1_pid = clone_parent(&env, STAGE_CHILD);
+			if (stage1_pid < 0)
+				bail("unable to spawn stage-1");

-			/*
-			 * State machine for synchronisation with the children.
-			 *
-			 * Father only return when both child and grandchild are
-			 * ready, so we can receive all possible error codes
-			 * generated by children.
-			 */
 			syncfd = sync_child_pipe[1];
 			close(sync_child_pipe[0]);

-			while (!ready) {
+			/*
+			 * State machine for synchronisation with the children. We only
+			 * return once both the child and grandchild are ready.
+			 */
+			write_log(DEBUG, "-> stage-1 synchronisation loop");
+			stage1_complete = false;
+			while (!stage1_complete) {
 				enum sync_t s;

 				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
-					bail("failed to sync with child: next state");
+					bail("failed to sync with stage-1: next state");

 				switch (s) {
 				case SYNC_USERMAP_PLS:
+					write_log(DEBUG, "stage-1 requested userns mappings");
+
 					/*
 					 * Enable setgroups(2) if we've been asked to. But we also
 					 * have to explicitly disable setgroups(2) if we're
@ -735,70 +777,78 @@ void nsexec(void)
 					 * For rootless multi-entry mapping, config.is_setgroup shall be true and
 					 * newuidmap/newgidmap shall be used.
 					 */
-
 					if (config.is_rootless_euid && !config.is_setgroup)
-						update_setgroups(child, SETGROUPS_DENY);
+						update_setgroups(stage1_pid, SETGROUPS_DENY);

 					/* Set up mappings. */
-					update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len);
-					update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len);
+					update_uidmap(config.uidmappath, stage1_pid, config.uidmap, config.uidmap_len);
+					update_gidmap(config.gidmappath, stage1_pid, config.gidmap, config.gidmap_len);

 					s = SYNC_USERMAP_ACK;
 					if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
-						kill(child, SIGKILL);
-						bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
+						sane_kill(stage1_pid, SIGKILL);
+						sane_kill(stage2_pid, SIGKILL);
+						bail("failed to sync with stage-1: write(SYNC_USERMAP_ACK)");
 					}
 					break;
-				case SYNC_RECVPID_PLS:{
-						first_child = child;
+				case SYNC_RECVPID_PLS:
+					write_log(DEBUG, "stage-1 requested pid to be forwarded");

-						/* Get the init_func pid. */
-						if (read(syncfd, &child, sizeof(child)) != sizeof(child)) {
-							kill(first_child, SIGKILL);
-							bail("failed to sync with child: read(childpid)");
-						}
+					/* Get the stage-2 pid. */
+					if (read(syncfd, &stage2_pid, sizeof(stage2_pid)) != sizeof(stage2_pid)) {
+						sane_kill(stage1_pid, SIGKILL);
+						sane_kill(stage2_pid, SIGKILL);
+						bail("failed to sync with stage-1: read(stage2_pid)");
+					}

-						/* Send ACK. */
-						s = SYNC_RECVPID_ACK;
-						if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
-							kill(first_child, SIGKILL);
-							kill(child, SIGKILL);
-							bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
-						}
+					/* Send ACK. */
+					s = SYNC_RECVPID_ACK;
+					if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
+						sane_kill(stage1_pid, SIGKILL);
+						sane_kill(stage2_pid, SIGKILL);
+						bail("failed to sync with stage-1: write(SYNC_RECVPID_ACK)");
+					}

-						/* Send the init_func pid back to our parent.
-						 *
-						 * Send the init_func pid and the pid of the first child back to our parent.
-						 * We need to send both back because we can't reap the first child we created (CLONE_PARENT).
-						 * It becomes the responsibility of our parent to reap the first child.
-						 */
-						len = dprintf(pipenum, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child);
-						if (len < 0) {
-							kill(child, SIGKILL);
-							bail("unable to generate JSON for child pid");
-						}
+					/*
+					 * Send both the stage-1 and stage-2 pids back to runc.
+					 * runc needs the stage-2 to continue process management,
+					 * but because stage-1 was spawned with CLONE_PARENT we
+					 * cannot reap it within stage-0 and thus we need to ask
+					 * runc to reap the zombie for us.
+					 */
+					write_log(DEBUG, "forward stage-1 (%d) and stage-2 (%d) pids to runc",
+						  stage1_pid, stage2_pid);
+					len =
+					    dprintf(pipenum, "{\"stage1_pid\":%d,\"stage2_pid\":%d}\n", stage1_pid,
+						    stage2_pid);
+					if (len < 0) {
+						sane_kill(stage1_pid, SIGKILL);
+						sane_kill(stage2_pid, SIGKILL);
+						bail("failed to sync with runc: write(pid-JSON)");
 					}
 					break;
-				case SYNC_CHILD_READY:
-					ready = true;
+				case SYNC_CHILD_FINISH:
+					write_log(DEBUG, "stage-1 complete");
+					stage1_complete = true;
 					break;
 				default:
 					bail("unexpected sync value: %u", s);
 				}
 			}
+			write_log(DEBUG, "<- stage-1 synchronisation loop");

 			/* Now sync with grandchild. */
-
 			syncfd = sync_grandchild_pipe[1];
 			close(sync_grandchild_pipe[0]);
-
-			ready = false;
-			while (!ready) {
+			write_log(DEBUG, "-> stage-2 synchronisation loop");
+			stage2_complete = false;
+			while (!stage2_complete) {
 				enum sync_t s;

+				write_log(DEBUG, "signalling stage-2 to run");
 				s = SYNC_GRANDCHILD;
 				if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
-					kill(child, SIGKILL);
+					sane_kill(stage2_pid, SIGKILL);
 					bail("failed to sync with child: write(SYNC_GRANDCHILD)");
 				}

@ -806,27 +856,31 @@ void nsexec(void)
 					bail("failed to sync with child: next state");

 				switch (s) {
-				case SYNC_CHILD_READY:
-					ready = true;
+				case SYNC_CHILD_FINISH:
+					write_log(DEBUG, "stage-2 complete");
+					stage2_complete = true;
 					break;
 				default:
 					bail("unexpected sync value: %u", s);
 				}
 			}
+			write_log(DEBUG, "<- stage-2 synchronisation loop");
+			write_log(DEBUG, "<~ nsexec stage-0");
 			exit(0);
 		}
+		break;

 		/*
 		 * Stage 1: We're in the first child process. Our job is to join any
-		 *          provided namespaces in the netlink payload and unshare all
-		 *          of the requested namespaces. If we've been asked to
-		 *          CLONE_NEWUSER, we will ask our parent (stage 0) to set up
-		 *          our user mappings for us. Then, we create a new child
-		 *          (stage 2: JUMP_INIT) for PID namespace. We then send the
-		 *          child's PID to our parent (stage 0).
+		 *          provided namespaces in the netlink payload and unshare all of
+		 *          the requested namespaces. If we've been asked to CLONE_NEWUSER,
+		 *          we will ask our parent (stage 0) to set up our user mappings
+		 *          for us. Then, we create a new child (stage 2: STAGE_INIT) for
+		 *          PID namespace. We then send the child's PID to our parent
+		 *          (stage 0).
 		 */
-	case JUMP_CHILD:{
-			pid_t child;
+	case STAGE_CHILD:{
+			pid_t stage2_pid = -1;
 			enum sync_t s;

 			/* We're in a child and thus need to tell the parent if we die. */
@ -835,11 +889,12 @@ void nsexec(void)

 			/* For debugging. */
 			prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0);
+			write_log(DEBUG, "~> nsexec stage-1");

 			/*
 			 * We need to setns first. We cannot do this earlier (in stage 0)
 			 * because of the fact that we forked to get here (the PID of
-			 * [stage 2: JUMP_INIT]) would be meaningless). We could send it
+			 * [stage 2: STAGE_INIT]) would be meaningless). We could send it
 			 * using cmsg(3) but that's just annoying.
 			 */
 			if (config.namespaces)
@ -865,40 +920,50 @@ void nsexec(void)
 			 * problem.
 			 */
 			if (config.cloneflags & CLONE_NEWUSER) {
+				write_log(DEBUG, "unshare user namespace");
 				if (unshare(CLONE_NEWUSER) < 0)
 					bail("failed to unshare user namespace");
 				config.cloneflags &= ~CLONE_NEWUSER;

 				/*
-				 * We don't have the privileges to do any mapping here (see the
-				 * clone_parent rant). So signal our parent to hook us up.
+				 * We need to set ourselves as dumpable temporarily so that the
+				 * parent process can write to our procfs files.
 				 */
-
-				/* Switching is only necessary if we joined namespaces. */
 				if (config.namespaces) {
+					write_log(DEBUG, "temporarily set process as dumpable");
 					if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
-						bail("failed to set process as dumpable");
+						bail("failed to temporarily set process as dumpable");
 				}
+
+				/*
+				 * We don't have the privileges to do any mapping here (see the
+				 * clone_parent rant). So signal stage-0 to do the mapping for
+				 * us.
+				 */
+				write_log(DEBUG, "request stage-0 to map user namespace");
 				s = SYNC_USERMAP_PLS;
 				if (write(syncfd, &s, sizeof(s)) != sizeof(s))
 					bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");

 				/* ... wait for mapping ... */
-
+				write_log(DEBUG, "request stage-0 to map user namespace");
 				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
 					bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
 				if (s != SYNC_USERMAP_ACK)
 					bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
-				/* Switching is only necessary if we joined namespaces. */
+
+				/* Revert temporary re-dumpable setting. */
 				if (config.namespaces) {
+					write_log(DEBUG, "re-set process as non-dumpable");
 					if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
-						bail("failed to set process as dumpable");
+						bail("failed to re-set process as non-dumpable");
 				}

 				/* Become root in the namespace proper. */
 				if (setresuid(0, 0, 0) < 0)
 					bail("failed to become root in user namespace");
 			}
+
 			/*
 			 * Unshare all of the namespaces. Now, it should be noted that this
 			 * ordering might break in the future (especially with rootless
@ -909,8 +974,9 @@ void nsexec(void)
 			 * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
 			 * was broken, so we'll just do it the long way anyway.
 			 */
+			write_log(DEBUG, "unshare remaining namespace (except cgroupns)");
 			if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0)
-				bail("failed to unshare namespaces");
+				bail("failed to unshare remaining namespaces (except cgroupns)");

 			/*
 			 * TODO: What about non-namespace clone flags that we're dropping here?
@ -921,41 +987,45 @@ void nsexec(void)
 			 * which would break many applications and libraries, so we must fork
 			 * to actually enter the new PID namespace.
 			 */
-			child = clone_parent(&env, JUMP_INIT);
-			if (child < 0)
-				bail("unable to fork: init_func");
+			write_log(DEBUG, "spawn stage-2");
+			stage2_pid = clone_parent(&env, STAGE_INIT);
+			if (stage2_pid < 0)
+				bail("unable to spawn stage-2");

 			/* Send the child to our parent, which knows what it's doing. */
+			write_log(DEBUG, "request stage-0 to forward stage-2 pid (%d)", stage2_pid);
 			s = SYNC_RECVPID_PLS;
 			if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
-				kill(child, SIGKILL);
+				sane_kill(stage2_pid, SIGKILL);
 				bail("failed to sync with parent: write(SYNC_RECVPID_PLS)");
 			}
-			if (write(syncfd, &child, sizeof(child)) != sizeof(child)) {
-				kill(child, SIGKILL);
-				bail("failed to sync with parent: write(childpid)");
+			if (write(syncfd, &stage2_pid, sizeof(stage2_pid)) != sizeof(stage2_pid)) {
+				sane_kill(stage2_pid, SIGKILL);
+				bail("failed to sync with parent: write(stage2_pid)");
 			}

 			/* ... wait for parent to get the pid ... */
-
 			if (read(syncfd, &s, sizeof(s)) != sizeof(s)) {
-				kill(child, SIGKILL);
+				sane_kill(stage2_pid, SIGKILL);
 				bail("failed to sync with parent: read(SYNC_RECVPID_ACK)");
 			}
 			if (s != SYNC_RECVPID_ACK) {
-				kill(child, SIGKILL);
+				sane_kill(stage2_pid, SIGKILL);
 				bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
 			}

-			s = SYNC_CHILD_READY;
+			write_log(DEBUG, "signal completion to stage-0");
+			s = SYNC_CHILD_FINISH;
 			if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
-				kill(child, SIGKILL);
-				bail("failed to sync with parent: write(SYNC_CHILD_READY)");
+				sane_kill(stage2_pid, SIGKILL);
+				bail("failed to sync with parent: write(SYNC_CHILD_FINISH)");
 			}

-			/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
+			/* Our work is done. [Stage 2: STAGE_INIT] is doing the rest of the work. */
+			write_log(DEBUG, "<~ nsexec stage-1");
 			exit(0);
 		}
+		break;

 		/*
 		 * Stage 2: We're the final child process, and the only process that will
@ -963,7 +1033,7 @@ void nsexec(void)
 		 *          final cleanup steps and then return to the Go runtime to allow
 		 *          init_linux.go to run.
 		 */
-	case JUMP_INIT:{
+	case STAGE_INIT:{
 			/*
 			 * We're inside the child now, having jumped from the
 			 * start_child() code after forking in the parent.
@ -978,6 +1048,7 @@ void nsexec(void)

 			/* For debugging. */
 			prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0);
+			write_log(DEBUG, "~> nsexec stage-2");

 			if (read(syncfd, &s, sizeof(s)) != sizeof(s))
 				bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
@ -998,21 +1069,30 @@ void nsexec(void)
 					bail("setgroups failed");
 			}

-			/* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */
+			/*
+			 * Wait until our topmost parent has finished cgroup setup in
+			 * p.manager.Apply().
+			 *
+			 * TODO(cyphar): Check if this code is actually needed because we
+			 *               should be in the cgroup even from stage-0, so
+			 *               waiting until now might not make sense.
+			 */
 			if (config.cloneflags & CLONE_NEWCGROUP) {
 				uint8_t value;
 				if (read(pipenum, &value, sizeof(value)) != sizeof(value))
 					bail("read synchronisation value failed");
 				if (value == CREATECGROUPNS) {
+					write_log(DEBUG, "unshare cgroup namespace");
 					if (unshare(CLONE_NEWCGROUP) < 0)
 						bail("failed to unshare cgroup namespace");
 				} else
 					bail("received unknown synchronisation value");
 			}

-			s = SYNC_CHILD_READY;
+			write_log(DEBUG, "signal completion to stage-0");
+			s = SYNC_CHILD_FINISH;
 			if (write(syncfd, &s, sizeof(s)) != sizeof(s))
-				bail("failed to sync with patent: write(SYNC_CHILD_READY)");
+				bail("failed to sync with patent: write(SYNC_CHILD_FINISH)");

 			/* Close sync pipes. */
 			close(sync_grandchild_pipe[0]);
@ -1021,10 +1101,13 @@ void nsexec(void)
 			nl_free(&config);

 			/* Finish executing, let the Go runtime take over. */
+			write_log(DEBUG, "<= nsexec container setup");
+			write_log(DEBUG, "booting up go runtime ...");
 			return;
 		}
+		break;
 	default:
-		bail("unexpected jump value");
+		bail("unknown stage '%d' for jump value", current_stage);
 	}

 	/* Should never be reached. */
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.c
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.c
@ -0,0 +1 @@
+../escape.c
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.go
@ -0,0 +1,53 @@
+package escapetest
+
+// This file is part of escape_json_string unit test.
+// It is in a separate package so cgo can be used together
+// with go test.
+
+// #include <stdlib.h>
+// extern char *escape_json_string(char *str);
+// #cgo CFLAGS: -DESCAPE_TEST=1
+import "C"
+
+import (
+	"testing"
+	"unsafe"
+)
+
+func testEscapeJsonString(t *testing.T, input, want string) {
+	in := C.CString(input)
+	out := C.escape_json_string(in)
+	got := C.GoString(out)
+	C.free(unsafe.Pointer(out))
+	t.Logf("input: %q, output: %q", input, got)
+	if got != want {
+		t.Errorf("Failed on input: %q, want %q, got %q", input, want, got)
+	}
+}
+
+func testEscapeJson(t *testing.T) {
+	testCases := []struct {
+		input, output string
+	}{
+		{"", ""},
+		{"abcdef", "abcdef"},
+		{`\\\\\\`, `\\\\\\\\\\\\`},
+		{`with"quote`, `with\"quote`},
+		{"\n\r\b\t\f\\", `\n\r\b\t\f\\`},
+		{"\007", "\\u0007"},
+		{"\017 \020 \037", "\\u000f \\u0010 \\u001f"},
+		{"\033", "\\u001b"},
+		{`<->`, `<->`},
+		{"\176\177\200", "~\\u007f\200"},
+		{"\000", ""},
+		{"a\x7fxc", "a\\u007fxc"},
+		{"a\033xc", "a\\u001bxc"},
+		{"a\nxc", "a\\nxc"},
+		{"a\\xc", "a\\\\xc"},
+		{"Barney B\303\244r", "Barney B\303\244r"},
+	}
+
+	for _, tc := range testCases {
+		testEscapeJsonString(t, tc.input, tc.output)
+	}
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup.go
@ -1,41 +0,0 @@
-package user
-
-import (
-	"errors"
-)
-
-var (
-	// The current operating system does not provide the required data for user lookups.
-	ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data")
-	// No matching entries found in file.
-	ErrNoPasswdEntries = errors.New("no matching entries in passwd file")
-	ErrNoGroupEntries  = errors.New("no matching entries in group file")
-)
-
-// LookupUser looks up a user by their username in /etc/passwd. If the user
-// cannot be found (or there is no /etc/passwd file on the filesystem), then
-// LookupUser returns an error.
-func LookupUser(username string) (User, error) {
-	return lookupUser(username)
-}
-
-// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot
-// be found (or there is no /etc/passwd file on the filesystem), then LookupId
-// returns an error.
-func LookupUid(uid int) (User, error) {
-	return lookupUid(uid)
-}
-
-// LookupGroup looks up a group by its name in /etc/group. If the group cannot
-// be found (or there is no /etc/group file on the filesystem), then LookupGroup
-// returns an error.
-func LookupGroup(groupname string) (Group, error) {
-	return lookupGroup(groupname)
-}
-
-// LookupGid looks up a group by its group id in /etc/group. If the group cannot
-// be found (or there is no /etc/group file on the filesystem), then LookupGid
-// returns an error.
-func LookupGid(gid int) (Group, error) {
-	return lookupGid(gid)
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_unix.go
@ -16,13 +16,19 @@ const (
 	unixGroupPath  = "/etc/group"
 )

-func lookupUser(username string) (User, error) {
+// LookupUser looks up a user by their username in /etc/passwd. If the user
+// cannot be found (or there is no /etc/passwd file on the filesystem), then
+// LookupUser returns an error.
+func LookupUser(username string) (User, error) {
 	return lookupUserFunc(func(u User) bool {
 		return u.Name == username
 	})
 }

-func lookupUid(uid int) (User, error) {
+// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot
+// be found (or there is no /etc/passwd file on the filesystem), then LookupId
+// returns an error.
+func LookupUid(uid int) (User, error) {
 	return lookupUserFunc(func(u User) bool {
 		return u.Uid == uid
 	})
@ -51,13 +57,19 @@ func lookupUserFunc(filter func(u User) bool) (User, error) {
 	return users[0], nil
 }

-func lookupGroup(groupname string) (Group, error) {
+// LookupGroup looks up a group by its name in /etc/group. If the group cannot
+// be found (or there is no /etc/group file on the filesystem), then LookupGroup
+// returns an error.
+func LookupGroup(groupname string) (Group, error) {
 	return lookupGroupFunc(func(g Group) bool {
 		return g.Name == groupname
 	})
 }

-func lookupGid(gid int) (Group, error) {
+// LookupGid looks up a group by its group id in /etc/group. If the group cannot
+// be found (or there is no /etc/group file on the filesystem), then LookupGid
+// returns an error.
+func LookupGid(gid int) (Group, error) {
 	return lookupGroupFunc(func(g Group) bool {
 		return g.Gid == gid
 	})
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/lookup_windows.go
@ -1,40 +0,0 @@
-// +build windows
-
-package user
-
-import (
-	"fmt"
-	"os/user"
-)
-
-func lookupUser(username string) (User, error) {
-	u, err := user.Lookup(username)
-	if err != nil {
-		return User{}, err
-	}
-	return userFromOS(u)
-}
-
-func lookupUid(uid int) (User, error) {
-	u, err := user.LookupId(fmt.Sprintf("%d", uid))
-	if err != nil {
-		return User{}, err
-	}
-	return userFromOS(u)
-}
-
-func lookupGroup(groupname string) (Group, error) {
-	g, err := user.LookupGroup(groupname)
-	if err != nil {
-		return Group{}, err
-	}
-	return groupFromOS(g)
-}
-
-func lookupGid(gid int) (Group, error) {
-	g, err := user.LookupGroupId(fmt.Sprintf("%d", gid))
-	if err != nil {
-		return Group{}, err
-	}
-	return groupFromOS(g)
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/user.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user.go
@ -2,10 +2,10 @@ package user

 import (
 	"bufio"
+	"errors"
 	"fmt"
 	"io"
 	"os"
-	"os/user"
 	"strconv"
 	"strings"
 )
@ -16,6 +16,13 @@ const (
 )

 var (
+	// The current operating system does not provide the required data for user lookups.
+	ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data")
+
+	// No matching entries found in file.
+	ErrNoPasswdEntries = errors.New("no matching entries in passwd file")
+	ErrNoGroupEntries  = errors.New("no matching entries in group file")
+
 	ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minId, maxId)
 )

@ -29,28 +36,6 @@ type User struct {
 	Shell string
 }

-// userFromOS converts an os/user.(*User) to local User
-//
-// (This does not include Pass, Shell or Gecos)
-func userFromOS(u *user.User) (User, error) {
-	newUser := User{
-		Name: u.Username,
-		Home: u.HomeDir,
-	}
-	id, err := strconv.Atoi(u.Uid)
-	if err != nil {
-		return newUser, err
-	}
-	newUser.Uid = id
-
-	id, err = strconv.Atoi(u.Gid)
-	if err != nil {
-		return newUser, err
-	}
-	newUser.Gid = id
-	return newUser, nil
-}
-
 type Group struct {
 	Name string
 	Pass string
@ -58,23 +43,6 @@ type Group struct {
 	List []string
 }

-// groupFromOS converts an os/user.(*Group) to local Group
-//
-// (This does not include Pass or List)
-func groupFromOS(g *user.Group) (Group, error) {
-	newGroup := Group{
-		Name: g.Name,
-	}
-
-	id, err := strconv.Atoi(g.Gid)
-	if err != nil {
-		return newGroup, err
-	}
-	newGroup.Gid = id
-
-	return newGroup, nil
-}
-
 // SubID represents an entry in /etc/sub{u,g}id
 type SubID struct {
 	Name  string
@ -466,7 +434,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
 		// we asked for a group but didn't find it. let's check to see
 		// if we wanted a numeric group
 		if !found {
-			gid, err := strconv.Atoi(ag)
+			gid, err := strconv.ParseInt(ag, 10, 64)
 			if err != nil {
 				return nil, fmt.Errorf("Unable to find group %s", ag)
 			}
@ -474,7 +442,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
 			if gid < minId || gid > maxId {
 				return nil, ErrRange
 			}
-			gidMap[gid] = struct{}{}
+			gidMap[int(gid)] = struct{}{}
 		}
 	}
 	gids := []int{}
--- a/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go
@ -0,0 +1,42 @@
+// +build gofuzz
+
+package user
+
+import (
+	"io"
+	"strings"
+)
+
+func IsDivisbleBy(n int, divisibleby int) bool {
+	return (n % divisibleby) == 0
+}
+
+func FuzzUser(data []byte) int {
+	if len(data) == 0 {
+		return -1
+	}
+	if !IsDivisbleBy(len(data), 5) {
+		return -1
+	}
+
+	var divided [][]byte
+
+	chunkSize := len(data) / 5
+
+	for i := 0; i < len(data); i += chunkSize {
+		end := i + chunkSize
+
+		divided = append(divided, data[i:end])
+	}
+
+	_, _ = ParsePasswdFilter(strings.NewReader(string(divided[0])), nil)
+
+	var passwd, group io.Reader
+
+	group = strings.NewReader(string(divided[1]))
+	_, _ = GetAdditionalGroups([]string{string(divided[2])}, group)
+
+	passwd = strings.NewReader(string(divided[3]))
+	_, _ = GetExecUser(string(divided[4]), nil, passwd, group)
+	return 1
+}