From 0a2760f864d316b809d53ee7c017721686f18b41 Mon Sep 17 00:00:00 2001 From: Euan Kemp Date: Tue, 23 Aug 2016 15:04:17 -0700 Subject: [PATCH] stage1-kvm: Avoid writing misleading subcgroup Ref #2664 The `init.go` code does not run nspawn with --register=true for kvm, but does write a subcgroup file of that form. This makes it a little saner, and also simplifies it down to only one dbus call rather than calling it several times in different places. --- .../devel/stage1-implementors-guide.md | 6 +++++ stage1/init/init.go | 23 ++++++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/Documentation/devel/stage1-implementors-guide.md b/Documentation/devel/stage1-implementors-guide.md index 5a66cb4803..56e7c6463b 100644 --- a/Documentation/devel/stage1-implementors-guide.md +++ b/Documentation/devel/stage1-implementors-guide.md @@ -54,6 +54,12 @@ Stage1 implementors have two options for doing so; only one must be implemented: * `/var/lib/rkt/pods/run/$uuid/pid`: the PID of the process that will be given to the "enter" entrypoint. * `/var/lib/rkt/pods/run/$uuid/ppid`: the PID of the parent of the process that will be given to the "enter" entrypoint. That parent process must have exactly one child process. +The entrypoint of a stage1 may also optionally inform rkt of the "pod cgroup", the `name=systemd` cgroup the pod's applications are expected to reside under, via the `subcgroup` file. If this file is written, it must be written before the `pid` or `ppid` files are written. This information is useful for any external monitoring system that wishes to reliably link a given cgroup to its associated rkt pod. The file should be written in the pod directory at `/var/lib/rkt/pods/run/$uuid/subcgroup`. + +The file's contents should be a text string, for example of the form `machine-rkt\xuuid.scope`, which will match the control in the cgroup hierarchy of the `ppid` or `pid` of the pod. + +Any stage1 that supports and expects machined registration to occur will likely want to write such a file. + #### Arguments * `--debug` to activate debugging diff --git a/stage1/init/init.go b/stage1/init/init.go index e3f859e651..b83aabe8b6 100644 --- a/stage1/init/init.go +++ b/stage1/init/init.go @@ -215,8 +215,9 @@ func installAssets() error { return proj2aci.PrepareAssets(assets, "./stage1/rootfs/", nil) } -// getArgsEnv returns the nspawn or lkvm args and env according to the flavor used -func getArgsEnv(p *stage1commontypes.Pod, flavor string, debug bool, n *networking.Networking, insecureOptions stage1initcommon.Stage1InsecureOptions) ([]string, []string, error) { +// getArgsEnv returns the nspawn or lkvm args and env according to the flavor +// as the first two return values respectively. +func getArgsEnv(p *stage1commontypes.Pod, flavor string, canMachinedRegister bool, debug bool, n *networking.Networking, insecureOptions stage1initcommon.Stage1InsecureOptions) ([]string, []string, error) { var args []string env := os.Environ() @@ -298,7 +299,7 @@ func getArgsEnv(p *stage1commontypes.Pod, flavor string, debug bool, n *networki args = append(args, fmt.Sprintf("-L%s", context)) } - if machinedRegister() { + if canMachinedRegister { args = append(args, fmt.Sprintf("--register=true")) } else { args = append(args, fmt.Sprintf("--register=false")) @@ -320,7 +321,7 @@ func getArgsEnv(p *stage1commontypes.Pod, flavor string, debug bool, n *networki args = append(args, fmt.Sprintf("-L%s", context)) } - if machinedRegister() { + if canMachinedRegister { args = append(args, fmt.Sprintf("--register=true")) } else { args = append(args, fmt.Sprintf("--register=false")) @@ -581,7 +582,12 @@ func stage1() int { return 1 } - args, env, err := getArgsEnv(p, flavor, debug, n, insecureOptions) + canMachinedRegister := false + if flavor != "kvm" { + // kvm doesn't register with systemd right now, see #2664. + canMachinedRegister = machinedRegister() + } + args, env, err := getArgsEnv(p, flavor, canMachinedRegister, debug, n, insecureOptions) if err != nil { log.Error(err) return 1 @@ -622,13 +628,14 @@ func stage1() int { } s1Root := common.Stage1RootfsPath(p.Root) machineID := stage1initcommon.GetMachineID(p) - subcgroup, err := getContainerSubCgroup(machineID) + subcgroup, err := getContainerSubCgroup(machineID, canMachinedRegister) if err == nil { if err := ioutil.WriteFile(filepath.Join(p.Root, "subcgroup"), []byte(fmt.Sprintf("%s", subcgroup)), 0644); err != nil { log.FatalE("cannot write subcgroup file", err) return 1 } + if err := mountContainerCgroups(s1Root, enabledCgroups, subcgroup, serviceNames); err != nil { log.PrintE("couldn't mount the container cgroups", err) return 1 @@ -720,7 +727,7 @@ func mountContainerCgroups(s1Root string, enabledCgroups map[int][]string, subcg return nil } -func getContainerSubCgroup(machineID string) (string, error) { +func getContainerSubCgroup(machineID string, canMachinedRegister bool) (string, error) { var subcgroup string fromUnit, err := util.RunningFromSystemService() if err != nil { @@ -743,7 +750,7 @@ func getContainerSubCgroup(machineID string) (string, error) { } else { escapedmID := strings.Replace(machineID, "-", "\\x2d", -1) machineDir := "machine-" + escapedmID + ".scope" - if machinedRegister() { + if canMachinedRegister { // we are not in the final cgroup yet: systemd-nspawn will move us // to the correct cgroup later during registration so we can't // look it up in /proc/self/cgroup