mirror of https://github.com/docker/cli.git
vendor: github.com/opencontainers/runc v1.0.0-rc95
Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
This commit is contained in:
parent
579279ce09
commit
79a9fd61fd
|
@ -55,7 +55,7 @@ github.com/modern-go/reflect2 94122c33edd36123c84d5368cfb2
|
||||||
github.com/morikuni/aec 39771216ff4c63d11f5e604076f9c45e8be1067b # v1.0.0
|
github.com/morikuni/aec 39771216ff4c63d11f5e604076f9c45e8be1067b # v1.0.0
|
||||||
github.com/opencontainers/go-digest ea51bea511f75cfa3ef6098cc253c5c3609b037a # v1.0.0
|
github.com/opencontainers/go-digest ea51bea511f75cfa3ef6098cc253c5c3609b037a # v1.0.0
|
||||||
github.com/opencontainers/image-spec d60099175f88c47cd379c4738d158884749ed235 # v1.0.1
|
github.com/opencontainers/image-spec d60099175f88c47cd379c4738d158884749ed235 # v1.0.1
|
||||||
github.com/opencontainers/runc ff819c7e9184c13b7c2607fe6c30ae19403a7aff # v1.0.0-rc92
|
github.com/opencontainers/runc b9ee9c6314599f1b4a7f497e1f1f856fe433d3b7 # v1.0.0-rc95
|
||||||
github.com/opentracing/opentracing-go d34af3eaa63c4d08ab54863a4bdd0daa45212e12 # v1.2.0
|
github.com/opentracing/opentracing-go d34af3eaa63c4d08ab54863a4bdd0daa45212e12 # v1.2.0
|
||||||
github.com/pkg/errors 614d223910a179a466c1767a985424175c39b465 # v0.9.1
|
github.com/pkg/errors 614d223910a179a466c1767a985424175c39b465 # v0.9.1
|
||||||
github.com/prometheus/client_golang 6edbbd9e560190e318cdc5b4d3e630b442858380 # v1.6.0
|
github.com/prometheus/client_golang 6edbbd9e560190e318cdc5b4d3e630b442858380 # v1.6.0
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
# runc
|
# runc
|
||||||
|
|
||||||
[![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc)
|
|
||||||
[![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc)
|
[![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc)
|
||||||
[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc)
|
[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc)
|
||||||
[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/588/badge)](https://bestpractices.coreinfrastructure.org/projects/588)
|
[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/588/badge)](https://bestpractices.coreinfrastructure.org/projects/588)
|
||||||
|
[![gha/validate](https://github.com/opencontainers/runc/workflows/validate/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Avalidate)
|
||||||
|
[![gha/ci](https://github.com/opencontainers/runc/workflows/ci/badge.svg)](https://github.com/opencontainers/runc/actions?query=workflow%3Aci)
|
||||||
|
|
||||||
## Introduction
|
## Introduction
|
||||||
|
|
||||||
|
@ -17,10 +18,6 @@ This means that `runc` 1.0.0 should implement the 1.0 version of the specificati
|
||||||
|
|
||||||
You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.
|
You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.
|
||||||
|
|
||||||
Currently, the following features are not considered to be production-ready:
|
|
||||||
|
|
||||||
* [Support for cgroup v2](./docs/cgroup-v2.md)
|
|
||||||
|
|
||||||
## Security
|
## Security
|
||||||
|
|
||||||
The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md).
|
The reporting process and disclosure communications are outlined [here](https://github.com/opencontainers/org/blob/master/SECURITY.md).
|
||||||
|
@ -64,19 +61,20 @@ sudo make install
|
||||||
with some of them enabled by default (see `BUILDTAGS` in top-level `Makefile`).
|
with some of them enabled by default (see `BUILDTAGS` in top-level `Makefile`).
|
||||||
|
|
||||||
To change build tags from the default, set the `BUILDTAGS` variable for make,
|
To change build tags from the default, set the `BUILDTAGS` variable for make,
|
||||||
e.g.
|
e.g. to disable seccomp:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
make BUILDTAGS='seccomp apparmor'
|
make BUILDTAGS=""
|
||||||
```
|
```
|
||||||
|
|
||||||
| Build Tag | Feature | Enabled by default | Dependency |
|
| Build Tag | Feature | Enabled by default | Dependency |
|
||||||
|-----------|------------------------------------|--------------------|------------|
|
|-----------|------------------------------------|--------------------|------------|
|
||||||
| seccomp | Syscall filtering | yes | libseccomp |
|
| seccomp | Syscall filtering | yes | libseccomp |
|
||||||
| selinux | selinux process and mount labeling | yes | <none> |
|
|
||||||
| apparmor | apparmor profile support | yes | <none> |
|
|
||||||
| nokmem | disable kernel memory accounting | no | <none> |
|
|
||||||
|
|
||||||
|
The following build tags were used earlier, but are now obsoleted:
|
||||||
|
- **nokmem** (since runc v1.0.0-rc94 kernel memory settings are ignored)
|
||||||
|
- **apparmor** (since runc v1.0.0-rc93 the feature is always enabled)
|
||||||
|
- **selinux** (since runc v1.0.0-rc93 the feature is always enabled)
|
||||||
|
|
||||||
### Running the test suite
|
### Running the test suite
|
||||||
|
|
||||||
|
@ -128,6 +126,14 @@ make verify-dependencies
|
||||||
|
|
||||||
## Using runc
|
## Using runc
|
||||||
|
|
||||||
|
Please note that runc is a low level tool not designed with an end user
|
||||||
|
in mind. It is mostly employed by other higher level container software.
|
||||||
|
|
||||||
|
Therefore, unless there is some specific use case that prevents the use
|
||||||
|
of tools like Docker or Podman, it is not recommended to use runc directly.
|
||||||
|
|
||||||
|
If you still want to use runc, here's how.
|
||||||
|
|
||||||
### Creating an OCI Bundle
|
### Creating an OCI Bundle
|
||||||
|
|
||||||
In order to use runc you must have your container in the format of an OCI bundle.
|
In order to use runc you must have your container in the format of an OCI bundle.
|
||||||
|
@ -169,7 +175,9 @@ If you used the unmodified `runc spec` template this should give you a `sh` sess
|
||||||
|
|
||||||
The second way to start a container is using the specs lifecycle operations.
|
The second way to start a container is using the specs lifecycle operations.
|
||||||
This gives you more power over how the container is created and managed while it is running.
|
This gives you more power over how the container is created and managed while it is running.
|
||||||
This will also launch the container in the background so you will have to edit the `config.json` to remove the `terminal` setting for the simple examples here.
|
This will also launch the container in the background so you will have to edit
|
||||||
|
the `config.json` to remove the `terminal` setting for the simple examples
|
||||||
|
below (see more details about [runc terminal handling](docs/terminals.md)).
|
||||||
Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`.
|
Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`.
|
||||||
|
|
||||||
|
|
||||||
|
@ -292,8 +300,12 @@ PIDFile=/run/mycontainerid.pid
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
```
|
```
|
||||||
|
|
||||||
#### cgroup v2
|
## More documentation
|
||||||
See [`./docs/cgroup-v2.md`](./docs/cgroup-v2.md).
|
|
||||||
|
* [cgroup v2](./docs/cgroup-v2.md)
|
||||||
|
* [Checkpoint and restore](./docs/checkpoint-restore.md)
|
||||||
|
* [systemd cgroup driver](./docs/systemd.md)
|
||||||
|
* [Terminals and standard IO](./docs/terminals.md)
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
|
|
|
@ -1,26 +1,28 @@
|
||||||
module github.com/opencontainers/runc
|
module github.com/opencontainers/runc
|
||||||
|
|
||||||
go 1.14
|
go 1.13
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/checkpoint-restore/go-criu/v4 v4.1.0
|
github.com/checkpoint-restore/go-criu/v5 v5.0.0
|
||||||
github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775
|
github.com/cilium/ebpf v0.5.0
|
||||||
github.com/containerd/console v1.0.0
|
github.com/containerd/console v1.0.2
|
||||||
github.com/coreos/go-systemd/v22 v22.1.0
|
github.com/coreos/go-systemd/v22 v22.3.1
|
||||||
github.com/cyphar/filepath-securejoin v0.2.2
|
github.com/cyphar/filepath-securejoin v0.2.2
|
||||||
github.com/docker/go-units v0.4.0
|
github.com/docker/go-units v0.4.0
|
||||||
github.com/godbus/dbus/v5 v5.0.3
|
github.com/godbus/dbus/v5 v5.0.4
|
||||||
github.com/golang/protobuf v1.4.2
|
github.com/moby/sys/mountinfo v0.4.1
|
||||||
github.com/moby/sys/mountinfo v0.1.3
|
github.com/mrunalp/fileutils v0.5.0
|
||||||
github.com/mrunalp/fileutils v0.0.0-20200520151820-abd8a0e76976
|
github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417
|
||||||
github.com/opencontainers/runtime-spec v1.0.3-0.20200728170252-4d89ac9fbff6
|
github.com/opencontainers/selinux v1.8.0
|
||||||
github.com/opencontainers/selinux v1.6.0
|
|
||||||
github.com/pkg/errors v0.9.1
|
github.com/pkg/errors v0.9.1
|
||||||
github.com/seccomp/libseccomp-golang v0.9.1
|
github.com/seccomp/libseccomp-golang v0.9.1
|
||||||
github.com/sirupsen/logrus v1.6.0
|
github.com/sirupsen/logrus v1.7.0
|
||||||
github.com/syndtr/gocapability v0.0.0-20180916011248-d98352740cb2
|
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635
|
||||||
// NOTE: urfave/cli must be <= v1.22.1 due to a regression: https://github.com/urfave/cli/issues/1092
|
// NOTE: urfave/cli must be <= v1.22.1 due to a regression: https://github.com/urfave/cli/issues/1092
|
||||||
github.com/urfave/cli v1.22.1
|
github.com/urfave/cli v1.22.1
|
||||||
github.com/vishvananda/netlink v1.1.0
|
github.com/vishvananda/netlink v1.1.0
|
||||||
golang.org/x/sys v0.0.0-20200728102440-3e129f6d46b1
|
github.com/willf/bitset v1.1.11
|
||||||
|
golang.org/x/net v0.0.0-20201224014010-6772e930b67b
|
||||||
|
golang.org/x/sys v0.0.0-20210426230700-d19ff857e887
|
||||||
|
google.golang.org/protobuf v1.25.0
|
||||||
)
|
)
|
||||||
|
|
|
@ -57,6 +57,10 @@ struct describing how the container is to be created. A sample would look simila
|
||||||
|
|
||||||
```go
|
```go
|
||||||
defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
||||||
|
var devices []*configs.DeviceRule
|
||||||
|
for _, device := range specconv.AllowedDevices {
|
||||||
|
devices = append(devices, &device.Rule)
|
||||||
|
}
|
||||||
config := &configs.Config{
|
config := &configs.Config{
|
||||||
Rootfs: "/your/path/to/rootfs",
|
Rootfs: "/your/path/to/rootfs",
|
||||||
Capabilities: &configs.Capabilities{
|
Capabilities: &configs.Capabilities{
|
||||||
|
@ -155,7 +159,7 @@ config := &configs.Config{
|
||||||
Parent: "system",
|
Parent: "system",
|
||||||
Resources: &configs.Resources{
|
Resources: &configs.Resources{
|
||||||
MemorySwappiness: nil,
|
MemorySwappiness: nil,
|
||||||
Devices: specconv.AllowedDevices,
|
Devices: devices,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
MaskPaths: []string{
|
MaskPaths: []string{
|
||||||
|
@ -313,7 +317,7 @@ state, err := container.State()
|
||||||
#### Checkpoint & Restore
|
#### Checkpoint & Restore
|
||||||
|
|
||||||
libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
|
libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
|
||||||
This let's you save the state of a process running inside a container to disk, and then restore
|
This lets you save the state of a process running inside a container to disk, and then restore
|
||||||
that state into a new process, on the same machine or on another machine.
|
that state into a new process, on the same machine or on another machine.
|
||||||
|
|
||||||
`criu` version 1.5.2 or higher is required to use checkpoint and restore.
|
`criu` version 1.5.2 or higher is required to use checkpoint and restore.
|
||||||
|
|
|
@ -59,14 +59,38 @@
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
|
|
||||||
/* Use our own wrapper for memfd_create. */
|
/* Use our own wrapper for memfd_create. */
|
||||||
#if !defined(SYS_memfd_create) && defined(__NR_memfd_create)
|
#ifndef SYS_memfd_create
|
||||||
|
# ifdef __NR_memfd_create
|
||||||
# define SYS_memfd_create __NR_memfd_create
|
# define SYS_memfd_create __NR_memfd_create
|
||||||
|
# else
|
||||||
|
/* These values come from <https://fedora.juszkiewicz.com.pl/syscalls.html>. */
|
||||||
|
# warning "libc is outdated -- using hard-coded SYS_memfd_create"
|
||||||
|
# if defined(__x86_64__)
|
||||||
|
# define SYS_memfd_create 319
|
||||||
|
# elif defined(__i386__)
|
||||||
|
# define SYS_memfd_create 356
|
||||||
|
# elif defined(__ia64__)
|
||||||
|
# define SYS_memfd_create 1340
|
||||||
|
# elif defined(__arm__)
|
||||||
|
# define SYS_memfd_create 385
|
||||||
|
# elif defined(__aarch64__)
|
||||||
|
# define SYS_memfd_create 279
|
||||||
|
# elif defined(__ppc__) || defined(__PPC64__) || defined(__powerpc64__)
|
||||||
|
# define SYS_memfd_create 360
|
||||||
|
# elif defined(__s390__) || defined(__s390x__)
|
||||||
|
# define SYS_memfd_create 350
|
||||||
|
# else
|
||||||
|
# warning "unknown architecture -- cannot hard-code SYS_memfd_create"
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* memfd_create(2) flags -- copied from <linux/memfd.h>. */
|
/* memfd_create(2) flags -- copied from <linux/memfd.h>. */
|
||||||
#ifndef MFD_CLOEXEC
|
#ifndef MFD_CLOEXEC
|
||||||
# define MFD_CLOEXEC 0x0001U
|
# define MFD_CLOEXEC 0x0001U
|
||||||
# define MFD_ALLOW_SEALING 0x0002U
|
# define MFD_ALLOW_SEALING 0x0002U
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int memfd_create(const char *name, unsigned int flags)
|
int memfd_create(const char *name, unsigned int flags)
|
||||||
{
|
{
|
||||||
#ifdef SYS_memfd_create
|
#ifdef SYS_memfd_create
|
||||||
|
@ -77,7 +101,6 @@ int memfd_create(const char *name, unsigned int flags)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* This comes directly from <linux/fcntl.h>. */
|
/* This comes directly from <linux/fcntl.h>. */
|
||||||
#ifndef F_LINUX_SPECIFIC_BASE
|
#ifndef F_LINUX_SPECIFIC_BASE
|
||||||
# define F_LINUX_SPECIFIC_BASE 1024
|
# define F_LINUX_SPECIFIC_BASE 1024
|
||||||
|
@ -103,7 +126,7 @@ static void *must_realloc(void *ptr, size_t size)
|
||||||
void *old = ptr;
|
void *old = ptr;
|
||||||
do {
|
do {
|
||||||
ptr = realloc(old, size);
|
ptr = realloc(old, size);
|
||||||
} while(!ptr);
|
} while (!ptr);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,10 +138,10 @@ static void *must_realloc(void *ptr, size_t size)
|
||||||
static int is_self_cloned(void)
|
static int is_self_cloned(void)
|
||||||
{
|
{
|
||||||
int fd, ret, is_cloned = 0;
|
int fd, ret, is_cloned = 0;
|
||||||
struct stat statbuf = {};
|
struct stat statbuf = { };
|
||||||
struct statfs fsbuf = {};
|
struct statfs fsbuf = { };
|
||||||
|
|
||||||
fd = open("/proc/self/exe", O_RDONLY|O_CLOEXEC);
|
fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
fprintf(stderr, "you have no read access to runc binary file\n");
|
fprintf(stderr, "you have no read access to runc binary file\n");
|
||||||
return -ENOTRECOVERABLE;
|
return -ENOTRECOVERABLE;
|
||||||
|
@ -274,7 +297,7 @@ enum {
|
||||||
static int make_execfd(int *fdtype)
|
static int make_execfd(int *fdtype)
|
||||||
{
|
{
|
||||||
int fd = -1;
|
int fd = -1;
|
||||||
char template[PATH_MAX] = {0};
|
char template[PATH_MAX] = { 0 };
|
||||||
char *prefix = getenv("_LIBCONTAINER_STATEDIR");
|
char *prefix = getenv("_LIBCONTAINER_STATEDIR");
|
||||||
|
|
||||||
if (!prefix || *prefix != '/')
|
if (!prefix || *prefix != '/')
|
||||||
|
@ -303,7 +326,7 @@ static int make_execfd(int *fdtype)
|
||||||
*fdtype = EFD_FILE;
|
*fdtype = EFD_FILE;
|
||||||
fd = open(prefix, O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0700);
|
fd = open(prefix, O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC, 0700);
|
||||||
if (fd >= 0) {
|
if (fd >= 0) {
|
||||||
struct stat statbuf = {};
|
struct stat statbuf = { };
|
||||||
bool working_otmpfile = false;
|
bool working_otmpfile = false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -348,10 +371,10 @@ static int seal_execfd(int *fd, int fdtype)
|
||||||
switch (fdtype) {
|
switch (fdtype) {
|
||||||
case EFD_MEMFD:
|
case EFD_MEMFD:
|
||||||
return fcntl(*fd, F_ADD_SEALS, RUNC_MEMFD_SEALS);
|
return fcntl(*fd, F_ADD_SEALS, RUNC_MEMFD_SEALS);
|
||||||
case EFD_FILE: {
|
case EFD_FILE:{
|
||||||
/* Need to re-open our pseudo-memfd as an O_PATH to avoid execve(2) giving -ETXTBSY. */
|
/* Need to re-open our pseudo-memfd as an O_PATH to avoid execve(2) giving -ETXTBSY. */
|
||||||
int newfd;
|
int newfd;
|
||||||
char fdpath[PATH_MAX] = {0};
|
char fdpath[PATH_MAX] = { 0 };
|
||||||
|
|
||||||
if (fchmod(*fd, 0100) < 0)
|
if (fchmod(*fd, 0100) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -376,7 +399,7 @@ static int seal_execfd(int *fd, int fdtype)
|
||||||
static int try_bindfd(void)
|
static int try_bindfd(void)
|
||||||
{
|
{
|
||||||
int fd, ret = -1;
|
int fd, ret = -1;
|
||||||
char template[PATH_MAX] = {0};
|
char template[PATH_MAX] = { 0 };
|
||||||
char *prefix = getenv("_LIBCONTAINER_STATEDIR");
|
char *prefix = getenv("_LIBCONTAINER_STATEDIR");
|
||||||
|
|
||||||
if (!prefix || *prefix != '/')
|
if (!prefix || *prefix != '/')
|
||||||
|
@ -404,7 +427,6 @@ static int try_bindfd(void)
|
||||||
if (mount("", template, "", MS_REMOUNT | MS_BIND | MS_RDONLY, "") < 0)
|
if (mount("", template, "", MS_REMOUNT | MS_BIND | MS_RDONLY, "") < 0)
|
||||||
goto out_umount;
|
goto out_umount;
|
||||||
|
|
||||||
|
|
||||||
/* Get read-only handle that we're sure can't be made read-write. */
|
/* Get read-only handle that we're sure can't be made read-write. */
|
||||||
ret = open(template, O_PATH | O_CLOEXEC);
|
ret = open(template, O_PATH | O_CLOEXEC);
|
||||||
|
|
||||||
|
@ -448,7 +470,7 @@ static ssize_t fd_to_fd(int outfd, int infd)
|
||||||
if (n < 0)
|
if (n < 0)
|
||||||
return -1;
|
return -1;
|
||||||
nwritten += n;
|
nwritten += n;
|
||||||
} while(nwritten < nread);
|
} while (nwritten < nread);
|
||||||
|
|
||||||
total += nwritten;
|
total += nwritten;
|
||||||
}
|
}
|
||||||
|
@ -459,7 +481,7 @@ static ssize_t fd_to_fd(int outfd, int infd)
|
||||||
static int clone_binary(void)
|
static int clone_binary(void)
|
||||||
{
|
{
|
||||||
int binfd, execfd;
|
int binfd, execfd;
|
||||||
struct stat statbuf = {};
|
struct stat statbuf = { };
|
||||||
size_t sent = 0;
|
size_t sent = 0;
|
||||||
int fdtype = EFD_NONE;
|
int fdtype = EFD_NONE;
|
||||||
|
|
||||||
|
|
142
vendor/github.com/opencontainers/runc/libcontainer/nsenter/escape.c
generated
vendored
Normal file
142
vendor/github.com/opencontainers/runc/libcontainer/nsenter/escape.c
generated
vendored
Normal file
|
@ -0,0 +1,142 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#ifdef ESCAPE_TEST
|
||||||
|
# include <assert.h>
|
||||||
|
# define test_assert(arg) assert(arg)
|
||||||
|
#else
|
||||||
|
# define test_assert(arg)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define DEL '\x7f'
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Poor man version of itoa with base=16 and input number from 0 to 15,
|
||||||
|
* represented by a char. Converts it to a single hex digit ('0' to 'f').
|
||||||
|
*/
|
||||||
|
static char hex(char i)
|
||||||
|
{
|
||||||
|
test_assert(i >= 0 && i < 16);
|
||||||
|
|
||||||
|
if (i >= 0 && i < 10) {
|
||||||
|
return '0' + i;
|
||||||
|
}
|
||||||
|
if (i >= 10 && i < 16) {
|
||||||
|
return 'a' + i - 10;
|
||||||
|
}
|
||||||
|
return '?';
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Given the character, tells how many _extra_ characters are needed
|
||||||
|
* to JSON-escape it. If 0 is returned, the character does not need to
|
||||||
|
* be escaped.
|
||||||
|
*/
|
||||||
|
static int need_escape(char c)
|
||||||
|
{
|
||||||
|
switch (c) {
|
||||||
|
case '\\':
|
||||||
|
case '"':
|
||||||
|
case '\b':
|
||||||
|
case '\n':
|
||||||
|
case '\r':
|
||||||
|
case '\t':
|
||||||
|
case '\f':
|
||||||
|
return 1;
|
||||||
|
case DEL: // -> \u007f
|
||||||
|
return 5;
|
||||||
|
default:
|
||||||
|
if (c > 0 && c < ' ') {
|
||||||
|
// ASCII decimal 01 to 31 -> \u00xx
|
||||||
|
return 5;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Escape the string so it can be used as a JSON string (per RFC4627,
|
||||||
|
* section 2.5 minimal requirements, plus the DEL (0x7f) character).
|
||||||
|
*
|
||||||
|
* It is expected that the argument is a string allocated via malloc.
|
||||||
|
* In case no escaping is needed, the original string is returned as is;
|
||||||
|
* otherwise, the original string is free'd, and the newly allocated
|
||||||
|
* escaped string is returned. Thus, in any case, the value returned
|
||||||
|
* need to be free'd by the caller.
|
||||||
|
*/
|
||||||
|
char *escape_json_string(char *s)
|
||||||
|
{
|
||||||
|
int i, j, len;
|
||||||
|
char *c, *out;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* First, check if escaping is at all needed -- if not, we can avoid
|
||||||
|
* malloc and return the argument as is. While at it, count how much
|
||||||
|
* extra space is required.
|
||||||
|
*
|
||||||
|
* XXX: the counting code must be in sync with the escaping code
|
||||||
|
* (checked by test_assert()s below).
|
||||||
|
*/
|
||||||
|
for (i = j = 0; s[i] != '\0'; i++) {
|
||||||
|
j += need_escape(s[i]);
|
||||||
|
}
|
||||||
|
if (j == 0) {
|
||||||
|
// nothing to escape
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
len = i + j + 1;
|
||||||
|
out = malloc(len);
|
||||||
|
if (!out) {
|
||||||
|
free(s);
|
||||||
|
// As malloc failed, strdup can fail, too, so in the worst case
|
||||||
|
// scenario NULL will be returned from here.
|
||||||
|
return strdup("escape_json_string: out of memory");
|
||||||
|
}
|
||||||
|
for (c = s, j = 0; *c != '\0'; c++) {
|
||||||
|
switch (*c) {
|
||||||
|
case '"':
|
||||||
|
case '\\':
|
||||||
|
test_assert(need_escape(*c) == 1);
|
||||||
|
out[j++] = '\\';
|
||||||
|
out[j++] = *c;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ((*c < 0 || *c >= ' ') && (*c != DEL)) {
|
||||||
|
// no escape needed
|
||||||
|
test_assert(need_escape(*c) == 0);
|
||||||
|
out[j++] = *c;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
out[j++] = '\\';
|
||||||
|
switch (*c) {
|
||||||
|
case '\b':
|
||||||
|
out[j++] = 'b';
|
||||||
|
break;
|
||||||
|
case '\n':
|
||||||
|
out[j++] = 'n';
|
||||||
|
break;
|
||||||
|
case '\r':
|
||||||
|
out[j++] = 'r';
|
||||||
|
break;
|
||||||
|
case '\t':
|
||||||
|
out[j++] = 't';
|
||||||
|
break;
|
||||||
|
case '\f':
|
||||||
|
out[j++] = 'f';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
test_assert(need_escape(*c) == 5);
|
||||||
|
out[j++] = 'u';
|
||||||
|
out[j++] = '0';
|
||||||
|
out[j++] = '0';
|
||||||
|
out[j++] = hex(*c >> 4);
|
||||||
|
out[j++] = hex(*c & 0x0f);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
test_assert(j + 1 == len);
|
||||||
|
out[j] = '\0';
|
||||||
|
|
||||||
|
free(s);
|
||||||
|
return out;
|
||||||
|
}
|
|
@ -29,6 +29,8 @@
|
||||||
/* Get all of the CLONE_NEW* flags. */
|
/* Get all of the CLONE_NEW* flags. */
|
||||||
#include "namespace.h"
|
#include "namespace.h"
|
||||||
|
|
||||||
|
extern char *escape_json_string(char *str);
|
||||||
|
|
||||||
/* Synchronisation values. */
|
/* Synchronisation values. */
|
||||||
enum sync_t {
|
enum sync_t {
|
||||||
SYNC_USERMAP_PLS = 0x40, /* Request parent to map our users. */
|
SYNC_USERMAP_PLS = 0x40, /* Request parent to map our users. */
|
||||||
|
@ -36,7 +38,7 @@ enum sync_t {
|
||||||
SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
|
SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
|
||||||
SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
|
SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
|
||||||
SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */
|
SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */
|
||||||
SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */
|
SYNC_CHILD_FINISH = 0x45, /* The child or grandchild has finished. */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -45,10 +47,14 @@ enum sync_t {
|
||||||
*/
|
*/
|
||||||
#define CREATECGROUPNS 0x80
|
#define CREATECGROUPNS 0x80
|
||||||
|
|
||||||
|
#define STAGE_SETUP -1
|
||||||
/* longjmp() arguments. */
|
/* longjmp() arguments. */
|
||||||
#define JUMP_PARENT 0x00
|
#define STAGE_PARENT 0
|
||||||
#define JUMP_CHILD 0xA0
|
#define STAGE_CHILD 1
|
||||||
#define JUMP_INIT 0xA1
|
#define STAGE_INIT 2
|
||||||
|
|
||||||
|
/* Stores the current stage of nsexec. */
|
||||||
|
int current_stage = STAGE_SETUP;
|
||||||
|
|
||||||
/* Assume the stack grows down, so arguments should be above it. */
|
/* Assume the stack grows down, so arguments should be above it. */
|
||||||
struct clone_t {
|
struct clone_t {
|
||||||
|
@ -56,7 +62,7 @@ struct clone_t {
|
||||||
* Reserve some space for clone() to locate arguments
|
* Reserve some space for clone() to locate arguments
|
||||||
* and retcode in this place
|
* and retcode in this place
|
||||||
*/
|
*/
|
||||||
char stack[4096] __attribute__ ((aligned(16)));
|
char stack[4096] __attribute__((aligned(16)));
|
||||||
char stack_ptr[0];
|
char stack_ptr[0];
|
||||||
|
|
||||||
/* There's two children. This is used to execute the different code. */
|
/* There's two children. This is used to execute the different code. */
|
||||||
|
@ -124,9 +130,9 @@ static int logfd = -1;
|
||||||
# define SYS_setns __NR_setns
|
# define SYS_setns __NR_setns
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
#ifndef SYS_setns
|
# ifndef SYS_setns
|
||||||
# error "setns(2) syscall not supported by glibc version"
|
# error "setns(2) syscall not supported by glibc version"
|
||||||
#endif
|
# endif
|
||||||
|
|
||||||
int setns(int fd, int nstype)
|
int setns(int fd, int nstype)
|
||||||
{
|
{
|
||||||
|
@ -134,33 +140,43 @@ int setns(int fd, int nstype)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void write_log_with_info(const char *level, const char *function, int line, const char *format, ...)
|
static void write_log(const char *level, const char *format, ...)
|
||||||
{
|
{
|
||||||
char message[1024] = {};
|
char *message = NULL, *stage = NULL;
|
||||||
|
|
||||||
va_list args;
|
va_list args;
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (logfd < 0 || level == NULL)
|
if (logfd < 0 || level == NULL)
|
||||||
return;
|
goto out;
|
||||||
|
|
||||||
va_start(args, format);
|
va_start(args, format);
|
||||||
if (vsnprintf(message, sizeof(message), format, args) < 0)
|
ret = vasprintf(&message, format, args);
|
||||||
goto done;
|
|
||||||
|
|
||||||
dprintf(logfd, "{\"level\":\"%s\", \"msg\": \"%s:%d %s\"}\n", level, function, line, message);
|
|
||||||
done:
|
|
||||||
va_end(args);
|
va_end(args);
|
||||||
}
|
if (ret < 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
#define write_log(level, fmt, ...) \
|
message = escape_json_string(message);
|
||||||
write_log_with_info((level), __FUNCTION__, __LINE__, (fmt), ##__VA_ARGS__)
|
|
||||||
|
if (current_stage == STAGE_SETUP)
|
||||||
|
stage = strdup("nsexec");
|
||||||
|
else
|
||||||
|
ret = asprintf(&stage, "nsexec-%d", current_stage);
|
||||||
|
if (ret < 0)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
dprintf(logfd, "{\"level\":\"%s\", \"msg\": \"%s[%d]: %s\"}\n", level, stage, getpid(), message);
|
||||||
|
|
||||||
|
out:
|
||||||
|
free(message);
|
||||||
|
free(stage);
|
||||||
|
}
|
||||||
|
|
||||||
/* XXX: This is ugly. */
|
/* XXX: This is ugly. */
|
||||||
static int syncfd = -1;
|
static int syncfd = -1;
|
||||||
|
|
||||||
#define bail(fmt, ...) \
|
#define bail(fmt, ...) \
|
||||||
do { \
|
do { \
|
||||||
write_log(FATAL, "nsenter: " fmt ": %m", ##__VA_ARGS__); \
|
write_log(FATAL, fmt ": %m", ##__VA_ARGS__); \
|
||||||
exit(1); \
|
exit(1); \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
@ -187,7 +203,7 @@ static int write_file(char *data, size_t data_len, char *pathfmt, ...)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
close(fd);
|
close(fd);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -297,9 +313,11 @@ static void update_uidmap(const char *path, int pid, char *map, size_t map_len)
|
||||||
if (map == NULL || map_len <= 0)
|
if (map == NULL || map_len <= 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
write_log(DEBUG, "update /proc/%d/uid_map to '%s'", pid, map);
|
||||||
if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) {
|
if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) {
|
||||||
if (errno != EPERM)
|
if (errno != EPERM)
|
||||||
bail("failed to update /proc/%d/uid_map", pid);
|
bail("failed to update /proc/%d/uid_map", pid);
|
||||||
|
write_log(DEBUG, "update /proc/%d/uid_map got -EPERM (trying %s)", pid, path);
|
||||||
if (try_mapping_tool(path, pid, map, map_len))
|
if (try_mapping_tool(path, pid, map, map_len))
|
||||||
bail("failed to use newuid map on %d", pid);
|
bail("failed to use newuid map on %d", pid);
|
||||||
}
|
}
|
||||||
|
@ -310,9 +328,11 @@ static void update_gidmap(const char *path, int pid, char *map, size_t map_len)
|
||||||
if (map == NULL || map_len <= 0)
|
if (map == NULL || map_len <= 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
write_log(DEBUG, "update /proc/%d/gid_map to '%s'", pid, map);
|
||||||
if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) {
|
if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) {
|
||||||
if (errno != EPERM)
|
if (errno != EPERM)
|
||||||
bail("failed to update /proc/%d/gid_map", pid);
|
bail("failed to update /proc/%d/gid_map", pid);
|
||||||
|
write_log(DEBUG, "update /proc/%d/gid_map got -EPERM (trying %s)", pid, path);
|
||||||
if (try_mapping_tool(path, pid, map, map_len))
|
if (try_mapping_tool(path, pid, map, map_len))
|
||||||
bail("failed to use newgid map on %d", pid);
|
bail("failed to use newgid map on %d", pid);
|
||||||
}
|
}
|
||||||
|
@ -323,19 +343,20 @@ static void update_oom_score_adj(char *data, size_t len)
|
||||||
if (data == NULL || len <= 0)
|
if (data == NULL || len <= 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
write_log(DEBUG, "update /proc/self/oom_score_adj to '%s'", data);
|
||||||
if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
|
if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
|
||||||
bail("failed to update /proc/self/oom_score_adj");
|
bail("failed to update /proc/self/oom_score_adj");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* A dummy function that just jumps to the given jumpval. */
|
/* A dummy function that just jumps to the given jumpval. */
|
||||||
static int child_func(void *arg) __attribute__ ((noinline));
|
static int child_func(void *arg) __attribute__((noinline));
|
||||||
static int child_func(void *arg)
|
static int child_func(void *arg)
|
||||||
{
|
{
|
||||||
struct clone_t *ca = (struct clone_t *)arg;
|
struct clone_t *ca = (struct clone_t *)arg;
|
||||||
longjmp(*ca->env, ca->jmpval);
|
longjmp(*ca->env, ca->jmpval);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline));
|
static int clone_parent(jmp_buf *env, int jmpval) __attribute__((noinline));
|
||||||
static int clone_parent(jmp_buf *env, int jmpval)
|
static int clone_parent(jmp_buf *env, int jmpval)
|
||||||
{
|
{
|
||||||
struct clone_t ca = {
|
struct clone_t ca = {
|
||||||
|
@ -507,7 +528,6 @@ void join_namespaces(char *nslist)
|
||||||
char *namespace = strtok_r(nslist, ",", &saveptr);
|
char *namespace = strtok_r(nslist, ",", &saveptr);
|
||||||
struct namespace_t {
|
struct namespace_t {
|
||||||
int fd;
|
int fd;
|
||||||
int ns;
|
|
||||||
char type[PATH_MAX];
|
char type[PATH_MAX];
|
||||||
char path[PATH_MAX];
|
char path[PATH_MAX];
|
||||||
} *namespaces = NULL;
|
} *namespaces = NULL;
|
||||||
|
@ -542,7 +562,7 @@ void join_namespaces(char *nslist)
|
||||||
bail("failed to open %s", path);
|
bail("failed to open %s", path);
|
||||||
|
|
||||||
ns->fd = fd;
|
ns->fd = fd;
|
||||||
ns->ns = nsflag(namespace);
|
strncpy(ns->type, namespace, PATH_MAX - 1);
|
||||||
strncpy(ns->path, path, PATH_MAX - 1);
|
strncpy(ns->path, path, PATH_MAX - 1);
|
||||||
ns->path[PATH_MAX - 1] = '\0';
|
ns->path[PATH_MAX - 1] = '\0';
|
||||||
} while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL);
|
} while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL);
|
||||||
|
@ -555,12 +575,14 @@ void join_namespaces(char *nslist)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (i = 0; i < num; i++) {
|
for (i = 0; i < num; i++) {
|
||||||
struct namespace_t ns = namespaces[i];
|
struct namespace_t *ns = &namespaces[i];
|
||||||
|
int flag = nsflag(ns->type);
|
||||||
|
|
||||||
if (setns(ns.fd, ns.ns) < 0)
|
write_log(DEBUG, "setns(%#x) into %s namespace (with path %s)", flag, ns->type, ns->path);
|
||||||
bail("failed to setns to %s", ns.path);
|
if (setns(ns->fd, flag) < 0)
|
||||||
|
bail("failed to setns into %s namespace", ns->type);
|
||||||
|
|
||||||
close(ns.fd);
|
close(ns->fd);
|
||||||
}
|
}
|
||||||
|
|
||||||
free(namespaces);
|
free(namespaces);
|
||||||
|
@ -569,6 +591,14 @@ void join_namespaces(char *nslist)
|
||||||
/* Defined in cloned_binary.c. */
|
/* Defined in cloned_binary.c. */
|
||||||
extern int ensure_cloned_binary(void);
|
extern int ensure_cloned_binary(void);
|
||||||
|
|
||||||
|
static inline int sane_kill(pid_t pid, int signum)
|
||||||
|
{
|
||||||
|
if (pid > 0)
|
||||||
|
return kill(pid, signum);
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
void nsexec(void)
|
void nsexec(void)
|
||||||
{
|
{
|
||||||
int pipenum;
|
int pipenum;
|
||||||
|
@ -598,7 +628,14 @@ void nsexec(void)
|
||||||
if (ensure_cloned_binary() < 0)
|
if (ensure_cloned_binary() < 0)
|
||||||
bail("could not ensure we are a cloned binary");
|
bail("could not ensure we are a cloned binary");
|
||||||
|
|
||||||
write_log(DEBUG, "nsexec started");
|
/*
|
||||||
|
* Inform the parent we're past initial setup.
|
||||||
|
* For the other side of this, see initWaiter.
|
||||||
|
*/
|
||||||
|
if (write(pipenum, "", 1) != 1)
|
||||||
|
bail("could not inform the parent we are past initial setup");
|
||||||
|
|
||||||
|
write_log(DEBUG, "=> nsexec container setup");
|
||||||
|
|
||||||
/* Parse all of the netlink configuration. */
|
/* Parse all of the netlink configuration. */
|
||||||
nl_parse(pipenum, &config);
|
nl_parse(pipenum, &config);
|
||||||
|
@ -622,6 +659,7 @@ void nsexec(void)
|
||||||
* containers), which is the recommendation from the kernel folks.
|
* containers), which is the recommendation from the kernel folks.
|
||||||
*/
|
*/
|
||||||
if (config.namespaces) {
|
if (config.namespaces) {
|
||||||
|
write_log(DEBUG, "set process as non-dumpable");
|
||||||
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
|
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
|
||||||
bail("failed to set process as non-dumpable");
|
bail("failed to set process as non-dumpable");
|
||||||
}
|
}
|
||||||
|
@ -686,45 +724,49 @@ void nsexec(void)
|
||||||
* -- Aleksa "what has my life come to?" Sarai
|
* -- Aleksa "what has my life come to?" Sarai
|
||||||
*/
|
*/
|
||||||
|
|
||||||
switch (setjmp(env)) {
|
current_stage = setjmp(env);
|
||||||
|
switch (current_stage) {
|
||||||
/*
|
/*
|
||||||
* Stage 0: We're in the parent. Our job is just to create a new child
|
* Stage 0: We're in the parent. Our job is just to create a new child
|
||||||
* (stage 1: JUMP_CHILD) process and write its uid_map and
|
* (stage 1: STAGE_CHILD) process and write its uid_map and
|
||||||
* gid_map. That process will go on to create a new process, then
|
* gid_map. That process will go on to create a new process, then
|
||||||
* it will send us its PID which we will send to the bootstrap
|
* it will send us its PID which we will send to the bootstrap
|
||||||
* process.
|
* process.
|
||||||
*/
|
*/
|
||||||
case JUMP_PARENT:{
|
case STAGE_PARENT:{
|
||||||
int len;
|
int len;
|
||||||
pid_t child, first_child = -1;
|
pid_t stage1_pid = -1, stage2_pid = -1;
|
||||||
bool ready = false;
|
bool stage1_complete, stage2_complete;
|
||||||
|
|
||||||
/* For debugging. */
|
/* For debugging. */
|
||||||
prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0);
|
prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0);
|
||||||
|
write_log(DEBUG, "~> nsexec stage-0");
|
||||||
|
|
||||||
/* Start the process of getting a container. */
|
/* Start the process of getting a container. */
|
||||||
child = clone_parent(&env, JUMP_CHILD);
|
write_log(DEBUG, "spawn stage-1");
|
||||||
if (child < 0)
|
stage1_pid = clone_parent(&env, STAGE_CHILD);
|
||||||
bail("unable to fork: child_func");
|
if (stage1_pid < 0)
|
||||||
|
bail("unable to spawn stage-1");
|
||||||
|
|
||||||
/*
|
|
||||||
* State machine for synchronisation with the children.
|
|
||||||
*
|
|
||||||
* Father only return when both child and grandchild are
|
|
||||||
* ready, so we can receive all possible error codes
|
|
||||||
* generated by children.
|
|
||||||
*/
|
|
||||||
syncfd = sync_child_pipe[1];
|
syncfd = sync_child_pipe[1];
|
||||||
close(sync_child_pipe[0]);
|
close(sync_child_pipe[0]);
|
||||||
|
|
||||||
while (!ready) {
|
/*
|
||||||
|
* State machine for synchronisation with the children. We only
|
||||||
|
* return once both the child and grandchild are ready.
|
||||||
|
*/
|
||||||
|
write_log(DEBUG, "-> stage-1 synchronisation loop");
|
||||||
|
stage1_complete = false;
|
||||||
|
while (!stage1_complete) {
|
||||||
enum sync_t s;
|
enum sync_t s;
|
||||||
|
|
||||||
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
bail("failed to sync with child: next state");
|
bail("failed to sync with stage-1: next state");
|
||||||
|
|
||||||
switch (s) {
|
switch (s) {
|
||||||
case SYNC_USERMAP_PLS:
|
case SYNC_USERMAP_PLS:
|
||||||
|
write_log(DEBUG, "stage-1 requested userns mappings");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Enable setgroups(2) if we've been asked to. But we also
|
* Enable setgroups(2) if we've been asked to. But we also
|
||||||
* have to explicitly disable setgroups(2) if we're
|
* have to explicitly disable setgroups(2) if we're
|
||||||
|
@ -735,70 +777,78 @@ void nsexec(void)
|
||||||
* For rootless multi-entry mapping, config.is_setgroup shall be true and
|
* For rootless multi-entry mapping, config.is_setgroup shall be true and
|
||||||
* newuidmap/newgidmap shall be used.
|
* newuidmap/newgidmap shall be used.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (config.is_rootless_euid && !config.is_setgroup)
|
if (config.is_rootless_euid && !config.is_setgroup)
|
||||||
update_setgroups(child, SETGROUPS_DENY);
|
update_setgroups(stage1_pid, SETGROUPS_DENY);
|
||||||
|
|
||||||
/* Set up mappings. */
|
/* Set up mappings. */
|
||||||
update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len);
|
update_uidmap(config.uidmappath, stage1_pid, config.uidmap, config.uidmap_len);
|
||||||
update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len);
|
update_gidmap(config.gidmappath, stage1_pid, config.gidmap, config.gidmap_len);
|
||||||
|
|
||||||
s = SYNC_USERMAP_ACK;
|
s = SYNC_USERMAP_ACK;
|
||||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
kill(child, SIGKILL);
|
sane_kill(stage1_pid, SIGKILL);
|
||||||
bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
|
sane_kill(stage2_pid, SIGKILL);
|
||||||
|
bail("failed to sync with stage-1: write(SYNC_USERMAP_ACK)");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SYNC_RECVPID_PLS:{
|
case SYNC_RECVPID_PLS:
|
||||||
first_child = child;
|
write_log(DEBUG, "stage-1 requested pid to be forwarded");
|
||||||
|
|
||||||
/* Get the init_func pid. */
|
/* Get the stage-2 pid. */
|
||||||
if (read(syncfd, &child, sizeof(child)) != sizeof(child)) {
|
if (read(syncfd, &stage2_pid, sizeof(stage2_pid)) != sizeof(stage2_pid)) {
|
||||||
kill(first_child, SIGKILL);
|
sane_kill(stage1_pid, SIGKILL);
|
||||||
bail("failed to sync with child: read(childpid)");
|
sane_kill(stage2_pid, SIGKILL);
|
||||||
|
bail("failed to sync with stage-1: read(stage2_pid)");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Send ACK. */
|
/* Send ACK. */
|
||||||
s = SYNC_RECVPID_ACK;
|
s = SYNC_RECVPID_ACK;
|
||||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
kill(first_child, SIGKILL);
|
sane_kill(stage1_pid, SIGKILL);
|
||||||
kill(child, SIGKILL);
|
sane_kill(stage2_pid, SIGKILL);
|
||||||
bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
|
bail("failed to sync with stage-1: write(SYNC_RECVPID_ACK)");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Send the init_func pid back to our parent.
|
/*
|
||||||
*
|
* Send both the stage-1 and stage-2 pids back to runc.
|
||||||
* Send the init_func pid and the pid of the first child back to our parent.
|
* runc needs the stage-2 to continue process management,
|
||||||
* We need to send both back because we can't reap the first child we created (CLONE_PARENT).
|
* but because stage-1 was spawned with CLONE_PARENT we
|
||||||
* It becomes the responsibility of our parent to reap the first child.
|
* cannot reap it within stage-0 and thus we need to ask
|
||||||
|
* runc to reap the zombie for us.
|
||||||
*/
|
*/
|
||||||
len = dprintf(pipenum, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child);
|
write_log(DEBUG, "forward stage-1 (%d) and stage-2 (%d) pids to runc",
|
||||||
|
stage1_pid, stage2_pid);
|
||||||
|
len =
|
||||||
|
dprintf(pipenum, "{\"stage1_pid\":%d,\"stage2_pid\":%d}\n", stage1_pid,
|
||||||
|
stage2_pid);
|
||||||
if (len < 0) {
|
if (len < 0) {
|
||||||
kill(child, SIGKILL);
|
sane_kill(stage1_pid, SIGKILL);
|
||||||
bail("unable to generate JSON for child pid");
|
sane_kill(stage2_pid, SIGKILL);
|
||||||
}
|
bail("failed to sync with runc: write(pid-JSON)");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case SYNC_CHILD_READY:
|
case SYNC_CHILD_FINISH:
|
||||||
ready = true;
|
write_log(DEBUG, "stage-1 complete");
|
||||||
|
stage1_complete = true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
bail("unexpected sync value: %u", s);
|
bail("unexpected sync value: %u", s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
write_log(DEBUG, "<- stage-1 synchronisation loop");
|
||||||
|
|
||||||
/* Now sync with grandchild. */
|
/* Now sync with grandchild. */
|
||||||
|
|
||||||
syncfd = sync_grandchild_pipe[1];
|
syncfd = sync_grandchild_pipe[1];
|
||||||
close(sync_grandchild_pipe[0]);
|
close(sync_grandchild_pipe[0]);
|
||||||
|
write_log(DEBUG, "-> stage-2 synchronisation loop");
|
||||||
ready = false;
|
stage2_complete = false;
|
||||||
while (!ready) {
|
while (!stage2_complete) {
|
||||||
enum sync_t s;
|
enum sync_t s;
|
||||||
|
|
||||||
|
write_log(DEBUG, "signalling stage-2 to run");
|
||||||
s = SYNC_GRANDCHILD;
|
s = SYNC_GRANDCHILD;
|
||||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
kill(child, SIGKILL);
|
sane_kill(stage2_pid, SIGKILL);
|
||||||
bail("failed to sync with child: write(SYNC_GRANDCHILD)");
|
bail("failed to sync with child: write(SYNC_GRANDCHILD)");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -806,27 +856,31 @@ void nsexec(void)
|
||||||
bail("failed to sync with child: next state");
|
bail("failed to sync with child: next state");
|
||||||
|
|
||||||
switch (s) {
|
switch (s) {
|
||||||
case SYNC_CHILD_READY:
|
case SYNC_CHILD_FINISH:
|
||||||
ready = true;
|
write_log(DEBUG, "stage-2 complete");
|
||||||
|
stage2_complete = true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
bail("unexpected sync value: %u", s);
|
bail("unexpected sync value: %u", s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
write_log(DEBUG, "<- stage-2 synchronisation loop");
|
||||||
|
write_log(DEBUG, "<~ nsexec stage-0");
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Stage 1: We're in the first child process. Our job is to join any
|
* Stage 1: We're in the first child process. Our job is to join any
|
||||||
* provided namespaces in the netlink payload and unshare all
|
* provided namespaces in the netlink payload and unshare all of
|
||||||
* of the requested namespaces. If we've been asked to
|
* the requested namespaces. If we've been asked to CLONE_NEWUSER,
|
||||||
* CLONE_NEWUSER, we will ask our parent (stage 0) to set up
|
* we will ask our parent (stage 0) to set up our user mappings
|
||||||
* our user mappings for us. Then, we create a new child
|
* for us. Then, we create a new child (stage 2: STAGE_INIT) for
|
||||||
* (stage 2: JUMP_INIT) for PID namespace. We then send the
|
* PID namespace. We then send the child's PID to our parent
|
||||||
* child's PID to our parent (stage 0).
|
* (stage 0).
|
||||||
*/
|
*/
|
||||||
case JUMP_CHILD:{
|
case STAGE_CHILD:{
|
||||||
pid_t child;
|
pid_t stage2_pid = -1;
|
||||||
enum sync_t s;
|
enum sync_t s;
|
||||||
|
|
||||||
/* We're in a child and thus need to tell the parent if we die. */
|
/* We're in a child and thus need to tell the parent if we die. */
|
||||||
|
@ -835,11 +889,12 @@ void nsexec(void)
|
||||||
|
|
||||||
/* For debugging. */
|
/* For debugging. */
|
||||||
prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0);
|
prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0);
|
||||||
|
write_log(DEBUG, "~> nsexec stage-1");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to setns first. We cannot do this earlier (in stage 0)
|
* We need to setns first. We cannot do this earlier (in stage 0)
|
||||||
* because of the fact that we forked to get here (the PID of
|
* because of the fact that we forked to get here (the PID of
|
||||||
* [stage 2: JUMP_INIT]) would be meaningless). We could send it
|
* [stage 2: STAGE_INIT]) would be meaningless). We could send it
|
||||||
* using cmsg(3) but that's just annoying.
|
* using cmsg(3) but that's just annoying.
|
||||||
*/
|
*/
|
||||||
if (config.namespaces)
|
if (config.namespaces)
|
||||||
|
@ -865,40 +920,50 @@ void nsexec(void)
|
||||||
* problem.
|
* problem.
|
||||||
*/
|
*/
|
||||||
if (config.cloneflags & CLONE_NEWUSER) {
|
if (config.cloneflags & CLONE_NEWUSER) {
|
||||||
|
write_log(DEBUG, "unshare user namespace");
|
||||||
if (unshare(CLONE_NEWUSER) < 0)
|
if (unshare(CLONE_NEWUSER) < 0)
|
||||||
bail("failed to unshare user namespace");
|
bail("failed to unshare user namespace");
|
||||||
config.cloneflags &= ~CLONE_NEWUSER;
|
config.cloneflags &= ~CLONE_NEWUSER;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We don't have the privileges to do any mapping here (see the
|
* We need to set ourselves as dumpable temporarily so that the
|
||||||
* clone_parent rant). So signal our parent to hook us up.
|
* parent process can write to our procfs files.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Switching is only necessary if we joined namespaces. */
|
|
||||||
if (config.namespaces) {
|
if (config.namespaces) {
|
||||||
|
write_log(DEBUG, "temporarily set process as dumpable");
|
||||||
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
|
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
|
||||||
bail("failed to set process as dumpable");
|
bail("failed to temporarily set process as dumpable");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We don't have the privileges to do any mapping here (see the
|
||||||
|
* clone_parent rant). So signal stage-0 to do the mapping for
|
||||||
|
* us.
|
||||||
|
*/
|
||||||
|
write_log(DEBUG, "request stage-0 to map user namespace");
|
||||||
s = SYNC_USERMAP_PLS;
|
s = SYNC_USERMAP_PLS;
|
||||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
|
bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
|
||||||
|
|
||||||
/* ... wait for mapping ... */
|
/* ... wait for mapping ... */
|
||||||
|
write_log(DEBUG, "request stage-0 to map user namespace");
|
||||||
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
|
bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
|
||||||
if (s != SYNC_USERMAP_ACK)
|
if (s != SYNC_USERMAP_ACK)
|
||||||
bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
|
bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
|
||||||
/* Switching is only necessary if we joined namespaces. */
|
|
||||||
|
/* Revert temporary re-dumpable setting. */
|
||||||
if (config.namespaces) {
|
if (config.namespaces) {
|
||||||
|
write_log(DEBUG, "re-set process as non-dumpable");
|
||||||
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
|
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
|
||||||
bail("failed to set process as dumpable");
|
bail("failed to re-set process as non-dumpable");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Become root in the namespace proper. */
|
/* Become root in the namespace proper. */
|
||||||
if (setresuid(0, 0, 0) < 0)
|
if (setresuid(0, 0, 0) < 0)
|
||||||
bail("failed to become root in user namespace");
|
bail("failed to become root in user namespace");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unshare all of the namespaces. Now, it should be noted that this
|
* Unshare all of the namespaces. Now, it should be noted that this
|
||||||
* ordering might break in the future (especially with rootless
|
* ordering might break in the future (especially with rootless
|
||||||
|
@ -909,8 +974,9 @@ void nsexec(void)
|
||||||
* some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
|
* some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
|
||||||
* was broken, so we'll just do it the long way anyway.
|
* was broken, so we'll just do it the long way anyway.
|
||||||
*/
|
*/
|
||||||
|
write_log(DEBUG, "unshare remaining namespace (except cgroupns)");
|
||||||
if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0)
|
if (unshare(config.cloneflags & ~CLONE_NEWCGROUP) < 0)
|
||||||
bail("failed to unshare namespaces");
|
bail("failed to unshare remaining namespaces (except cgroupns)");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TODO: What about non-namespace clone flags that we're dropping here?
|
* TODO: What about non-namespace clone flags that we're dropping here?
|
||||||
|
@ -921,41 +987,45 @@ void nsexec(void)
|
||||||
* which would break many applications and libraries, so we must fork
|
* which would break many applications and libraries, so we must fork
|
||||||
* to actually enter the new PID namespace.
|
* to actually enter the new PID namespace.
|
||||||
*/
|
*/
|
||||||
child = clone_parent(&env, JUMP_INIT);
|
write_log(DEBUG, "spawn stage-2");
|
||||||
if (child < 0)
|
stage2_pid = clone_parent(&env, STAGE_INIT);
|
||||||
bail("unable to fork: init_func");
|
if (stage2_pid < 0)
|
||||||
|
bail("unable to spawn stage-2");
|
||||||
|
|
||||||
/* Send the child to our parent, which knows what it's doing. */
|
/* Send the child to our parent, which knows what it's doing. */
|
||||||
|
write_log(DEBUG, "request stage-0 to forward stage-2 pid (%d)", stage2_pid);
|
||||||
s = SYNC_RECVPID_PLS;
|
s = SYNC_RECVPID_PLS;
|
||||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
kill(child, SIGKILL);
|
sane_kill(stage2_pid, SIGKILL);
|
||||||
bail("failed to sync with parent: write(SYNC_RECVPID_PLS)");
|
bail("failed to sync with parent: write(SYNC_RECVPID_PLS)");
|
||||||
}
|
}
|
||||||
if (write(syncfd, &child, sizeof(child)) != sizeof(child)) {
|
if (write(syncfd, &stage2_pid, sizeof(stage2_pid)) != sizeof(stage2_pid)) {
|
||||||
kill(child, SIGKILL);
|
sane_kill(stage2_pid, SIGKILL);
|
||||||
bail("failed to sync with parent: write(childpid)");
|
bail("failed to sync with parent: write(stage2_pid)");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ... wait for parent to get the pid ... */
|
/* ... wait for parent to get the pid ... */
|
||||||
|
|
||||||
if (read(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
kill(child, SIGKILL);
|
sane_kill(stage2_pid, SIGKILL);
|
||||||
bail("failed to sync with parent: read(SYNC_RECVPID_ACK)");
|
bail("failed to sync with parent: read(SYNC_RECVPID_ACK)");
|
||||||
}
|
}
|
||||||
if (s != SYNC_RECVPID_ACK) {
|
if (s != SYNC_RECVPID_ACK) {
|
||||||
kill(child, SIGKILL);
|
sane_kill(stage2_pid, SIGKILL);
|
||||||
bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
|
bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
s = SYNC_CHILD_READY;
|
write_log(DEBUG, "signal completion to stage-0");
|
||||||
|
s = SYNC_CHILD_FINISH;
|
||||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||||
kill(child, SIGKILL);
|
sane_kill(stage2_pid, SIGKILL);
|
||||||
bail("failed to sync with parent: write(SYNC_CHILD_READY)");
|
bail("failed to sync with parent: write(SYNC_CHILD_FINISH)");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
|
/* Our work is done. [Stage 2: STAGE_INIT] is doing the rest of the work. */
|
||||||
|
write_log(DEBUG, "<~ nsexec stage-1");
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Stage 2: We're the final child process, and the only process that will
|
* Stage 2: We're the final child process, and the only process that will
|
||||||
|
@ -963,7 +1033,7 @@ void nsexec(void)
|
||||||
* final cleanup steps and then return to the Go runtime to allow
|
* final cleanup steps and then return to the Go runtime to allow
|
||||||
* init_linux.go to run.
|
* init_linux.go to run.
|
||||||
*/
|
*/
|
||||||
case JUMP_INIT:{
|
case STAGE_INIT:{
|
||||||
/*
|
/*
|
||||||
* We're inside the child now, having jumped from the
|
* We're inside the child now, having jumped from the
|
||||||
* start_child() code after forking in the parent.
|
* start_child() code after forking in the parent.
|
||||||
|
@ -978,6 +1048,7 @@ void nsexec(void)
|
||||||
|
|
||||||
/* For debugging. */
|
/* For debugging. */
|
||||||
prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0);
|
prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0);
|
||||||
|
write_log(DEBUG, "~> nsexec stage-2");
|
||||||
|
|
||||||
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
|
bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
|
||||||
|
@ -998,21 +1069,30 @@ void nsexec(void)
|
||||||
bail("setgroups failed");
|
bail("setgroups failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ... wait until our topmost parent has finished cgroup setup in p.manager.Apply() ... */
|
/*
|
||||||
|
* Wait until our topmost parent has finished cgroup setup in
|
||||||
|
* p.manager.Apply().
|
||||||
|
*
|
||||||
|
* TODO(cyphar): Check if this code is actually needed because we
|
||||||
|
* should be in the cgroup even from stage-0, so
|
||||||
|
* waiting until now might not make sense.
|
||||||
|
*/
|
||||||
if (config.cloneflags & CLONE_NEWCGROUP) {
|
if (config.cloneflags & CLONE_NEWCGROUP) {
|
||||||
uint8_t value;
|
uint8_t value;
|
||||||
if (read(pipenum, &value, sizeof(value)) != sizeof(value))
|
if (read(pipenum, &value, sizeof(value)) != sizeof(value))
|
||||||
bail("read synchronisation value failed");
|
bail("read synchronisation value failed");
|
||||||
if (value == CREATECGROUPNS) {
|
if (value == CREATECGROUPNS) {
|
||||||
|
write_log(DEBUG, "unshare cgroup namespace");
|
||||||
if (unshare(CLONE_NEWCGROUP) < 0)
|
if (unshare(CLONE_NEWCGROUP) < 0)
|
||||||
bail("failed to unshare cgroup namespace");
|
bail("failed to unshare cgroup namespace");
|
||||||
} else
|
} else
|
||||||
bail("received unknown synchronisation value");
|
bail("received unknown synchronisation value");
|
||||||
}
|
}
|
||||||
|
|
||||||
s = SYNC_CHILD_READY;
|
write_log(DEBUG, "signal completion to stage-0");
|
||||||
|
s = SYNC_CHILD_FINISH;
|
||||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||||
bail("failed to sync with patent: write(SYNC_CHILD_READY)");
|
bail("failed to sync with patent: write(SYNC_CHILD_FINISH)");
|
||||||
|
|
||||||
/* Close sync pipes. */
|
/* Close sync pipes. */
|
||||||
close(sync_grandchild_pipe[0]);
|
close(sync_grandchild_pipe[0]);
|
||||||
|
@ -1021,10 +1101,13 @@ void nsexec(void)
|
||||||
nl_free(&config);
|
nl_free(&config);
|
||||||
|
|
||||||
/* Finish executing, let the Go runtime take over. */
|
/* Finish executing, let the Go runtime take over. */
|
||||||
|
write_log(DEBUG, "<= nsexec container setup");
|
||||||
|
write_log(DEBUG, "booting up go runtime ...");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
bail("unexpected jump value");
|
bail("unknown stage '%d' for jump value", current_stage);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Should never be reached. */
|
/* Should never be reached. */
|
||||||
|
|
1
vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.c
generated
vendored
Symbolic link
1
vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.c
generated
vendored
Symbolic link
|
@ -0,0 +1 @@
|
||||||
|
../escape.c
|
53
vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.go
generated
vendored
Normal file
53
vendor/github.com/opencontainers/runc/libcontainer/nsenter/test/escape.go
generated
vendored
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
package escapetest
|
||||||
|
|
||||||
|
// This file is part of escape_json_string unit test.
|
||||||
|
// It is in a separate package so cgo can be used together
|
||||||
|
// with go test.
|
||||||
|
|
||||||
|
// #include <stdlib.h>
|
||||||
|
// extern char *escape_json_string(char *str);
|
||||||
|
// #cgo CFLAGS: -DESCAPE_TEST=1
|
||||||
|
import "C"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
func testEscapeJsonString(t *testing.T, input, want string) {
|
||||||
|
in := C.CString(input)
|
||||||
|
out := C.escape_json_string(in)
|
||||||
|
got := C.GoString(out)
|
||||||
|
C.free(unsafe.Pointer(out))
|
||||||
|
t.Logf("input: %q, output: %q", input, got)
|
||||||
|
if got != want {
|
||||||
|
t.Errorf("Failed on input: %q, want %q, got %q", input, want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func testEscapeJson(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
input, output string
|
||||||
|
}{
|
||||||
|
{"", ""},
|
||||||
|
{"abcdef", "abcdef"},
|
||||||
|
{`\\\\\\`, `\\\\\\\\\\\\`},
|
||||||
|
{`with"quote`, `with\"quote`},
|
||||||
|
{"\n\r\b\t\f\\", `\n\r\b\t\f\\`},
|
||||||
|
{"\007", "\\u0007"},
|
||||||
|
{"\017 \020 \037", "\\u000f \\u0010 \\u001f"},
|
||||||
|
{"\033", "\\u001b"},
|
||||||
|
{`<->`, `<->`},
|
||||||
|
{"\176\177\200", "~\\u007f\200"},
|
||||||
|
{"\000", ""},
|
||||||
|
{"a\x7fxc", "a\\u007fxc"},
|
||||||
|
{"a\033xc", "a\\u001bxc"},
|
||||||
|
{"a\nxc", "a\\nxc"},
|
||||||
|
{"a\\xc", "a\\\\xc"},
|
||||||
|
{"Barney B\303\244r", "Barney B\303\244r"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
testEscapeJsonString(t, tc.input, tc.output)
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,41 +0,0 @@
|
||||||
package user
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
)
|
|
||||||
|
|
||||||
var (
|
|
||||||
// The current operating system does not provide the required data for user lookups.
|
|
||||||
ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data")
|
|
||||||
// No matching entries found in file.
|
|
||||||
ErrNoPasswdEntries = errors.New("no matching entries in passwd file")
|
|
||||||
ErrNoGroupEntries = errors.New("no matching entries in group file")
|
|
||||||
)
|
|
||||||
|
|
||||||
// LookupUser looks up a user by their username in /etc/passwd. If the user
|
|
||||||
// cannot be found (or there is no /etc/passwd file on the filesystem), then
|
|
||||||
// LookupUser returns an error.
|
|
||||||
func LookupUser(username string) (User, error) {
|
|
||||||
return lookupUser(username)
|
|
||||||
}
|
|
||||||
|
|
||||||
// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot
|
|
||||||
// be found (or there is no /etc/passwd file on the filesystem), then LookupId
|
|
||||||
// returns an error.
|
|
||||||
func LookupUid(uid int) (User, error) {
|
|
||||||
return lookupUid(uid)
|
|
||||||
}
|
|
||||||
|
|
||||||
// LookupGroup looks up a group by its name in /etc/group. If the group cannot
|
|
||||||
// be found (or there is no /etc/group file on the filesystem), then LookupGroup
|
|
||||||
// returns an error.
|
|
||||||
func LookupGroup(groupname string) (Group, error) {
|
|
||||||
return lookupGroup(groupname)
|
|
||||||
}
|
|
||||||
|
|
||||||
// LookupGid looks up a group by its group id in /etc/group. If the group cannot
|
|
||||||
// be found (or there is no /etc/group file on the filesystem), then LookupGid
|
|
||||||
// returns an error.
|
|
||||||
func LookupGid(gid int) (Group, error) {
|
|
||||||
return lookupGid(gid)
|
|
||||||
}
|
|
|
@ -16,13 +16,19 @@ const (
|
||||||
unixGroupPath = "/etc/group"
|
unixGroupPath = "/etc/group"
|
||||||
)
|
)
|
||||||
|
|
||||||
func lookupUser(username string) (User, error) {
|
// LookupUser looks up a user by their username in /etc/passwd. If the user
|
||||||
|
// cannot be found (or there is no /etc/passwd file on the filesystem), then
|
||||||
|
// LookupUser returns an error.
|
||||||
|
func LookupUser(username string) (User, error) {
|
||||||
return lookupUserFunc(func(u User) bool {
|
return lookupUserFunc(func(u User) bool {
|
||||||
return u.Name == username
|
return u.Name == username
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func lookupUid(uid int) (User, error) {
|
// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot
|
||||||
|
// be found (or there is no /etc/passwd file on the filesystem), then LookupId
|
||||||
|
// returns an error.
|
||||||
|
func LookupUid(uid int) (User, error) {
|
||||||
return lookupUserFunc(func(u User) bool {
|
return lookupUserFunc(func(u User) bool {
|
||||||
return u.Uid == uid
|
return u.Uid == uid
|
||||||
})
|
})
|
||||||
|
@ -51,13 +57,19 @@ func lookupUserFunc(filter func(u User) bool) (User, error) {
|
||||||
return users[0], nil
|
return users[0], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func lookupGroup(groupname string) (Group, error) {
|
// LookupGroup looks up a group by its name in /etc/group. If the group cannot
|
||||||
|
// be found (or there is no /etc/group file on the filesystem), then LookupGroup
|
||||||
|
// returns an error.
|
||||||
|
func LookupGroup(groupname string) (Group, error) {
|
||||||
return lookupGroupFunc(func(g Group) bool {
|
return lookupGroupFunc(func(g Group) bool {
|
||||||
return g.Name == groupname
|
return g.Name == groupname
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func lookupGid(gid int) (Group, error) {
|
// LookupGid looks up a group by its group id in /etc/group. If the group cannot
|
||||||
|
// be found (or there is no /etc/group file on the filesystem), then LookupGid
|
||||||
|
// returns an error.
|
||||||
|
func LookupGid(gid int) (Group, error) {
|
||||||
return lookupGroupFunc(func(g Group) bool {
|
return lookupGroupFunc(func(g Group) bool {
|
||||||
return g.Gid == gid
|
return g.Gid == gid
|
||||||
})
|
})
|
||||||
|
|
|
@ -1,40 +0,0 @@
|
||||||
// +build windows
|
|
||||||
|
|
||||||
package user
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os/user"
|
|
||||||
)
|
|
||||||
|
|
||||||
func lookupUser(username string) (User, error) {
|
|
||||||
u, err := user.Lookup(username)
|
|
||||||
if err != nil {
|
|
||||||
return User{}, err
|
|
||||||
}
|
|
||||||
return userFromOS(u)
|
|
||||||
}
|
|
||||||
|
|
||||||
func lookupUid(uid int) (User, error) {
|
|
||||||
u, err := user.LookupId(fmt.Sprintf("%d", uid))
|
|
||||||
if err != nil {
|
|
||||||
return User{}, err
|
|
||||||
}
|
|
||||||
return userFromOS(u)
|
|
||||||
}
|
|
||||||
|
|
||||||
func lookupGroup(groupname string) (Group, error) {
|
|
||||||
g, err := user.LookupGroup(groupname)
|
|
||||||
if err != nil {
|
|
||||||
return Group{}, err
|
|
||||||
}
|
|
||||||
return groupFromOS(g)
|
|
||||||
}
|
|
||||||
|
|
||||||
func lookupGid(gid int) (Group, error) {
|
|
||||||
g, err := user.LookupGroupId(fmt.Sprintf("%d", gid))
|
|
||||||
if err != nil {
|
|
||||||
return Group{}, err
|
|
||||||
}
|
|
||||||
return groupFromOS(g)
|
|
||||||
}
|
|
|
@ -2,10 +2,10 @@ package user
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"os/user"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
@ -16,6 +16,13 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
// The current operating system does not provide the required data for user lookups.
|
||||||
|
ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data")
|
||||||
|
|
||||||
|
// No matching entries found in file.
|
||||||
|
ErrNoPasswdEntries = errors.New("no matching entries in passwd file")
|
||||||
|
ErrNoGroupEntries = errors.New("no matching entries in group file")
|
||||||
|
|
||||||
ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minId, maxId)
|
ErrRange = fmt.Errorf("uids and gids must be in range %d-%d", minId, maxId)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -29,28 +36,6 @@ type User struct {
|
||||||
Shell string
|
Shell string
|
||||||
}
|
}
|
||||||
|
|
||||||
// userFromOS converts an os/user.(*User) to local User
|
|
||||||
//
|
|
||||||
// (This does not include Pass, Shell or Gecos)
|
|
||||||
func userFromOS(u *user.User) (User, error) {
|
|
||||||
newUser := User{
|
|
||||||
Name: u.Username,
|
|
||||||
Home: u.HomeDir,
|
|
||||||
}
|
|
||||||
id, err := strconv.Atoi(u.Uid)
|
|
||||||
if err != nil {
|
|
||||||
return newUser, err
|
|
||||||
}
|
|
||||||
newUser.Uid = id
|
|
||||||
|
|
||||||
id, err = strconv.Atoi(u.Gid)
|
|
||||||
if err != nil {
|
|
||||||
return newUser, err
|
|
||||||
}
|
|
||||||
newUser.Gid = id
|
|
||||||
return newUser, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
type Group struct {
|
type Group struct {
|
||||||
Name string
|
Name string
|
||||||
Pass string
|
Pass string
|
||||||
|
@ -58,23 +43,6 @@ type Group struct {
|
||||||
List []string
|
List []string
|
||||||
}
|
}
|
||||||
|
|
||||||
// groupFromOS converts an os/user.(*Group) to local Group
|
|
||||||
//
|
|
||||||
// (This does not include Pass or List)
|
|
||||||
func groupFromOS(g *user.Group) (Group, error) {
|
|
||||||
newGroup := Group{
|
|
||||||
Name: g.Name,
|
|
||||||
}
|
|
||||||
|
|
||||||
id, err := strconv.Atoi(g.Gid)
|
|
||||||
if err != nil {
|
|
||||||
return newGroup, err
|
|
||||||
}
|
|
||||||
newGroup.Gid = id
|
|
||||||
|
|
||||||
return newGroup, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// SubID represents an entry in /etc/sub{u,g}id
|
// SubID represents an entry in /etc/sub{u,g}id
|
||||||
type SubID struct {
|
type SubID struct {
|
||||||
Name string
|
Name string
|
||||||
|
@ -466,7 +434,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
|
||||||
// we asked for a group but didn't find it. let's check to see
|
// we asked for a group but didn't find it. let's check to see
|
||||||
// if we wanted a numeric group
|
// if we wanted a numeric group
|
||||||
if !found {
|
if !found {
|
||||||
gid, err := strconv.Atoi(ag)
|
gid, err := strconv.ParseInt(ag, 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("Unable to find group %s", ag)
|
return nil, fmt.Errorf("Unable to find group %s", ag)
|
||||||
}
|
}
|
||||||
|
@ -474,7 +442,7 @@ func GetAdditionalGroups(additionalGroups []string, group io.Reader) ([]int, err
|
||||||
if gid < minId || gid > maxId {
|
if gid < minId || gid > maxId {
|
||||||
return nil, ErrRange
|
return nil, ErrRange
|
||||||
}
|
}
|
||||||
gidMap[gid] = struct{}{}
|
gidMap[int(gid)] = struct{}{}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gids := []int{}
|
gids := []int{}
|
||||||
|
|
42
vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go
generated
vendored
Normal file
42
vendor/github.com/opencontainers/runc/libcontainer/user/user_fuzzer.go
generated
vendored
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
// +build gofuzz
|
||||||
|
|
||||||
|
package user
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func IsDivisbleBy(n int, divisibleby int) bool {
|
||||||
|
return (n % divisibleby) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func FuzzUser(data []byte) int {
|
||||||
|
if len(data) == 0 {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
if !IsDivisbleBy(len(data), 5) {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
|
||||||
|
var divided [][]byte
|
||||||
|
|
||||||
|
chunkSize := len(data) / 5
|
||||||
|
|
||||||
|
for i := 0; i < len(data); i += chunkSize {
|
||||||
|
end := i + chunkSize
|
||||||
|
|
||||||
|
divided = append(divided, data[i:end])
|
||||||
|
}
|
||||||
|
|
||||||
|
_, _ = ParsePasswdFilter(strings.NewReader(string(divided[0])), nil)
|
||||||
|
|
||||||
|
var passwd, group io.Reader
|
||||||
|
|
||||||
|
group = strings.NewReader(string(divided[1]))
|
||||||
|
_, _ = GetAdditionalGroups([]string{string(divided[2])}, group)
|
||||||
|
|
||||||
|
passwd = strings.NewReader(string(divided[3]))
|
||||||
|
_, _ = GetExecUser(string(divided[4]), nil, passwd, group)
|
||||||
|
return 1
|
||||||
|
}
|
Loading…
Reference in New Issue