[RFC PATCH 11/27] containers: Sample program for driving container objects

From: David Howells
Date: Fri Feb 15 2019 - 11:09:18 EST


Add a sample program to demonstrate driving a container object. It is
called something like:

./samples/vfs/test-container /dev/sda3

where /dev/sda3 holds an ext4 filesystem that has appropriate /etc, /bin,
/usr, /lib, /proc directories emplaced such that procfs can be mounted and
then /bin/bash can be executed within the container.

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
---

samples/vfs/Makefile | 5 +
samples/vfs/test-container.c | 279 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 283 insertions(+), 1 deletion(-)
create mode 100644 samples/vfs/test-container.c

diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile
index b88655cb2f1d..25420919ee40 100644
--- a/samples/vfs/Makefile
+++ b/samples/vfs/Makefile
@@ -4,7 +4,8 @@ hostprogs-$(CONFIG_SAMPLE_VFS) := \
test-fs-query \
test-fsmount \
test-mntinfo \
- test-statx
+ test-statx \
+ test-container

# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -17,3 +18,5 @@ HOSTLDLIBS_test-mntinfo += -lm
HOSTCFLAGS_test-fs-query.o += -I$(objtree)/usr/include
HOSTCFLAGS_test-fsmount.o += -I$(objtree)/usr/include
HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include
+HOSTCFLAGS_test-container.o += -I$(objtree)/usr/include
+HOSTLDLIBS_test-container += -lkeyutils
diff --git a/samples/vfs/test-container.c b/samples/vfs/test-container.c
new file mode 100644
index 000000000000..44ff57afb5a4
--- /dev/null
+++ b/samples/vfs/test-container.c
@@ -0,0 +1,279 @@
+/* Container test.
+ *
+ * Copyright (C) 2019 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@xxxxxxxxxx)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/wait.h>
+#include <linux/mount.h>
+#include <linux/unistd.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <keyutils.h>
+
+/* Hope -1 isn't a syscall */
+#ifndef __NR_fsopen
+#define __NR_fsopen -1
+#endif
+#ifndef __NR_fsmount
+#define __NR_fsmount -1
+#endif
+#ifndef __NR_fsconfig
+#define __NR_fsconfig -1
+#endif
+#ifndef __NR_move_mount
+#define __NR_move_mount -1
+#endif
+
+
+#define E(x) do { if ((x) == -1) { perror(#x); exit(1); } } while(0)
+
+static void check_messages(int fd)
+{
+ char buf[4096];
+ int err, n;
+
+ err = errno;
+
+ for (;;) {
+ n = read(fd, buf, sizeof(buf));
+ if (n < 0)
+ break;
+ n -= 2;
+
+ switch (buf[0]) {
+ case 'e':
+ fprintf(stderr, "Error: %*.*s\n", n, n, buf + 2);
+ break;
+ case 'w':
+ fprintf(stderr, "Warning: %*.*s\n", n, n, buf + 2);
+ break;
+ case 'i':
+ fprintf(stderr, "Info: %*.*s\n", n, n, buf + 2);
+ break;
+ }
+ }
+
+ errno = err;
+}
+
+static __attribute__((noreturn))
+void mount_error(int fd, const char *s)
+{
+ check_messages(fd);
+ fprintf(stderr, "%s: %m\n", s);
+ exit(1);
+}
+
+#define CONTAINER_NEW_FS_NS 0x00000001 /* Dup current fs namespace */
+#define CONTAINER_NEW_EMPTY_FS_NS 0x00000002 /* Provide new empty fs namespace */
+#define CONTAINER_NEW_CGROUP_NS 0x00000004 /* Dup current cgroup namespace [priv] */
+#define CONTAINER_NEW_UTS_NS 0x00000008 /* Dup current uts namespace */
+#define CONTAINER_NEW_IPC_NS 0x00000010 /* Dup current ipc namespace */
+#define CONTAINER_NEW_USER_NS 0x00000020 /* Dup current user namespace */
+#define CONTAINER_NEW_PID_NS 0x00000040 /* Dup current pid namespace */
+#define CONTAINER_NEW_NET_NS 0x00000080 /* Dup current net namespace */
+#define CONTAINER_KILL_ON_CLOSE 0x00000100 /* Kill all member processes when fd closed */
+#define CONTAINER_FD_CLOEXEC 0x00000200 /* Close the fd on exec */
+#define CONTAINER__FLAG_MASK 0x000003ff
+
+static inline int fsopen(const char *fs_name, unsigned int flags)
+{
+ return syscall(__NR_fsopen, fs_name, flags);
+}
+
+static inline int fsconfig(int fsfd, unsigned int cmd,
+ const char *key, const void *val, int aux)
+{
+ return syscall(__NR_fsconfig, fsfd, cmd, key, val, aux);
+}
+
+static inline int fsmount(int fsfd, unsigned int flags, unsigned int attr_flags)
+{
+ return syscall(__NR_fsmount, fsfd, flags, attr_flags);
+}
+
+static inline int move_mount(int from_dfd, const char *from_pathname,
+ int to_dfd, const char *to_pathname,
+ unsigned int flags)
+{
+ return syscall(__NR_move_mount,
+ from_dfd, from_pathname,
+ to_dfd, to_pathname, flags);
+}
+
+static inline int container_create(const char *name, unsigned int mask)
+{
+ return syscall(__NR_container_create, name, mask, 0, 0, 0);
+}
+
+static inline int fork_into_container(int containerfd)
+{
+ return syscall(__NR_fork_into_container, containerfd);
+}
+
+#define E_fsconfig(fd, cmd, key, val, aux) \
+ do { \
+ if (fsconfig(fd, cmd, key, val, aux) == -1) \
+ mount_error(fd, key ?: "create"); \
+ } while (0)
+
+/*
+ * The container init process.
+ */
+static __attribute__((noreturn))
+void container_init(void)
+{
+ if (0) {
+ /* Do a bit of debugging on the container. */
+ struct dirent **dlist;
+ struct stat st;
+ char buf[4096];
+ int n, i;
+
+ printf("hello!\n");
+ n = scandir("/", &dlist, NULL, alphasort);
+ if (n == -1) {
+ perror("scandir");
+ exit(1);
+ }
+
+ for (i = 0; i < n; i++) {
+ struct dirent *p = dlist[i];
+
+ if (p)
+ printf("- %u %s\n", p->d_type, p->d_name);
+ }
+
+ n = readlink("/bin", buf, sizeof(buf) - 1);
+ if (n == -1) {
+ perror("readlink");
+ exit(1);
+ }
+
+ buf[n] = 0;
+ printf("/bin -> %s\n", buf);
+
+ if (stat("/lib64/ld-linux-x86-64.so.2", &st) == -1) {
+ perror("stat");
+ exit(1);
+ }
+
+ printf("mode %o\n", st.st_mode);
+ }
+
+ if (keyctl_join_session_keyring(NULL) == -1) {
+ perror("keyctl/join");
+ exit(1);
+ }
+
+ setenv("PS1", "container>", 1);
+ execl("/bin/bash", "bash", NULL);
+ perror("execl");
+ exit(1);
+}
+
+/*
+ * The container manager process.
+ */
+int main(int argc, char *argv[])
+{
+ pid_t pid;
+ int fsfd, mfd, cfd, ws;
+
+ if (argc != 2) {
+ fprintf(stderr, "Format: test-container <root-dev>\n");
+ exit(2);
+ }
+
+ cfd = container_create("foo-test",
+ CONTAINER_NEW_EMPTY_FS_NS |
+ //CONTAINER_NEW_UTS_NS |
+ //CONTAINER_NEW_IPC_NS |
+ //CONTAINER_NEW_USER_NS |
+ CONTAINER_NEW_PID_NS |
+ CONTAINER_KILL_ON_CLOSE |
+ CONTAINER_FD_CLOEXEC);
+ if (cfd == -1) {
+ perror("container_create");
+ exit(1);
+ }
+
+ system("cat /proc/containers");
+
+ /* Open the filesystem that's going to form the container root. */
+ printf("Creating root...\n");
+ fsfd = fsopen("ext4", 0);
+ if (fsfd == -1) {
+ perror("fsopen/root");
+ exit(1);
+ }
+
+ E_fsconfig(fsfd, FSCONFIG_SET_CONTAINER, NULL, NULL, cfd);
+ E_fsconfig(fsfd, FSCONFIG_SET_STRING, "source", argv[1], 0);
+ E_fsconfig(fsfd, FSCONFIG_SET_FLAG, "user_xattr", NULL, 0);
+ E_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+
+ /* Mount the container root */
+ printf("Mounting root...\n");
+ mfd = fsmount(fsfd, 0, 0);
+ if (mfd < 0)
+ mount_error(fsfd, "fsmount/root");
+
+ if (move_mount(mfd, "", cfd, "/",
+ MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_CONTAINER_ROOT) < 0) {
+ perror("move_mount/root");
+ exit(1);
+ }
+ E(close(fsfd));
+ E(close(mfd));
+
+ /* Mount procfs within the container */
+ printf("Creating procfs...\n");
+ fsfd = fsopen("proc", 0);
+ if (fsfd == -1) {
+ perror("fsopen/proc");
+ exit(1);
+ }
+
+ E_fsconfig(fsfd, FSCONFIG_SET_CONTAINER, NULL, NULL, cfd);
+ E_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+
+ printf("Mounting procfs...\n");
+ mfd = fsmount(fsfd, 0, 0);
+ if (mfd < 0)
+ mount_error(fsfd, "fsmount/proc");
+ if (move_mount(mfd, "", cfd, "proc", MOVE_MOUNT_F_EMPTY_PATH) < 0) {
+ perror("move_mount/proc");
+ exit(1);
+ }
+ E(close(fsfd));
+ E(close(mfd));
+
+ /* Start the 'init' process. */
+ printf("Forking...\n");
+ switch ((pid = fork_into_container(cfd))) {
+ case -1:
+ perror("fork_into_container");
+ exit(1);
+ case 0:
+ close(cfd);
+ container_init();
+ default:
+ if (waitpid(pid, &ws, 0) < 0) {
+ perror("waitpid");
+ exit(1);
+ }
+ }
+ E(close(cfd));
+ exit(0);
+}