| type | CLONE_NEW* | -X | kernel |
|---|---|---|---|
| mnt | NS | -m | 2.4.19 |
| uts | UTS | -u | 2.6.19 |
| ipc | IPC | -i | 2.6.19 |
| pid | PID | -p | 2.6.24 |
| net | NET | -n | 2.6.29 |
| user | USER | -U | 3.8 |
| cgroup | CGROUP | -C | 4.6 |
| time | TIME | -T | 5.6 |
all eight + cgroups + image = container
# inspect via /proc $ ls /proc/$$/ns/ cgroup ipc mnt net pid pid_for_children time time_for_children user uts $ readlink /proc/$$/ns/pid pid:[4026531836] # equal inode = same namespace
# UTS only - change hostname $ sudo unshare --uts bash # PID ns needs --fork (and --mount-proc) $ sudo unshare --pid --fork --mount-proc bash # blank network stack (only lo, DOWN) $ sudo unshare --net bash # the works - basically a container $ sudo unshare --pid --uts --mount --ipc \ --net --cgroup --fork bash # rootless: user ns first, no sudo $ unshare --user --map-root-user bash
# every namespace of PID 12345 $ sudo nsenter --target 12345 --all bash # just the network ns (great for tcpdump) $ sudo nsenter --target 12345 --net bash # a docker container without docker exec $ PID=$(docker inspect -f \ '{{.State.Pid}}' mybox) $ sudo nsenter --target $PID --all bash # works even if dockerd is wedged
$ ip netns add myns $ ip netns list $ ip netns exec myns bash $ ip netns del myns # veth pair: virtual cable $ ip link add v0 type veth peer name v1 $ ip link set v1 netns myns $ ip addr add 10.0.0.1/24 dev v0 $ ip link set v0 up $ ip netns exec myns ip addr add \ 10.0.0.2/24 dev v1 $ ip netns exec myns ip link set v1 up
$ nsm list # all ns on host $ nsm tree # by type $ nsm inspect $(pidof nginx) $ nsm diff 1 $$ # vs init $ nsm ps --ns-type net # grouped $ nsm monitor # live events # named ns (under /run/nsm/) $ sudo nsm create mybox --type net $ sudo nsm enter mybox $ sudo nsm exec mybox -- ip addr $ sudo nsm destroy mybox
# two PIDs for one process in a new pid ns $ grep NSpid /proc/12345/status NSpid: 12345 1
$ sudo lsns # all $ sudo lsns -t net # filter $ sudo lsns -t pid -o NS,PID,COMMAND # find isolated processes (likely containers) $ for d in /proc/[0-9]*/; do p=$(basename $d) [ "$(readlink $d/ns/pid 2>/dev/null)" \ != "$(readlink /proc/1/ns/pid)" ] \ && echo "$p in non-init pid ns" done
Production also: drop caps, seccomp, apparmor/selinux, CNI for net.
mount --make-rprivate / first.kernel.apparmor_restrict_unprivileged_userns=0.# enter a docker container's network $ P=$(docker inspect -f '{{.State.Pid}}' X) $ sudo nsenter --target $P --net bash # NAT a netns to the world $ sysctl -w net.ipv4.ip_forward=1 $ iptables -t nat -A POSTROUTING \ -s 10.0.0.0/24 -j MASQUERADE $ ip netns exec myns ip route add \ default via 10.0.0.1 # cgroup v2 limits (no namespace involved) $ sudo mkdir /sys/fs/cgroup/mybox $ echo 67108864 | sudo tee \ /sys/fs/cgroup/mybox/memory.max $ echo $$ | sudo tee \ /sys/fs/cgroup/mybox/cgroup.procs
# is this process isolated from init? $ for ns in /proc/$PID/ns/*; do t=$(basename $ns) a=$(readlink $ns) b=$(readlink /proc/1/ns/$t) [ "$a" != "$b" ] && echo "ISOL: $t" done # shorthand via nsm $ nsm diff 1 $PID # watch for short-lived ns (runtimes spawn them) $ sudo nsm monitor