From a22557af9aae9abac09dc7d4a990e6722de94478 Mon Sep 17 00:00:00 2001 From: Yifan Gu Date: Mon, 19 Sep 2016 13:07:03 -0700 Subject: [PATCH 01/32] stop: Don't treat 'rkt stop already-stopped-pods' as errors. Since 'rkt stop' is idempotent, 'rkt stop' on an already stopped pod should not return error. --- rkt/stop.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/rkt/stop.go b/rkt/stop.go index 36806be869..88ab6da405 100644 --- a/rkt/stop.go +++ b/rkt/stop.go @@ -71,6 +71,11 @@ func runStop(cmd *cobra.Command, args []string) (exit int) { continue } + if p.AfterRun() { + stdout.Printf("pod %q is already stopped", p.UUID) + continue + } + if p.State() != pkgPod.Running { stderr.Error(fmt.Errorf("pod %q is not running", p.UUID)) errors++ From e15b7100615b7380081e6e180a5b723df2fefecc Mon Sep 17 00:00:00 2001 From: Yifan Gu Date: Mon, 19 Sep 2016 14:20:01 -0700 Subject: [PATCH 02/32] Allow `rkt app rm` on a stopped pod. When the pod is stopped, don't call app-add/app-rm entrypoint because there is not much we can/need to do when the whole pod is stopped. --- stage0/app.go | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/stage0/app.go b/stage0/app.go index 6f8498113e..1bf8e67b2d 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -313,19 +313,24 @@ func RmApp(dir string, uuid *types.UUID, usesOverlay bool, appName *types.ACName return errwrap.Wrap(errors.New("error determining 'enter' entrypoint"), err) } - args := []string{ - uuid.String(), - appName.String(), - filepath.Join(common.Stage1RootfsPath(dir), eep), - strconv.Itoa(podPID), - } + if podPID > 0 { + // Call app-stop and app-rm entrypoint only if the pod is still running. + // Otherwise, there's not much we can do about it except unmounting/removing + // the file system. + args := []string{ + uuid.String(), + appName.String(), + filepath.Join(common.Stage1RootfsPath(dir), eep), + strconv.Itoa(podPID), + } - if err := callEntrypoint(dir, appStopEntrypoint, args); err != nil { - return err - } + if err := callEntrypoint(dir, appStopEntrypoint, args); err != nil { + return err + } - if err := callEntrypoint(dir, appRmEntrypoint, args); err != nil { - return err + if err := callEntrypoint(dir, appRmEntrypoint, args); err != nil { + return err + } } appInfoDir := common.AppInfoPath(dir, *appName) From 4e4e662572d2b82947ea0c7fbdac9cff3e6f67d7 Mon Sep 17 00:00:00 2001 From: Yifan Gu Date: Mon, 19 Sep 2016 19:44:17 -0700 Subject: [PATCH 03/32] rkt status: Add '--format=json' flag to print json format for pods. Also add annotations, startedAt in the json definition of the pod. --- lib/app.go | 8 ++++---- lib/pod.go | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/types.go | 4 ++++ rkt/list.go | 26 +++++++++++--------------- rkt/status.go | 25 +++++++++++++++++++++++++ 5 files changed, 94 insertions(+), 19 deletions(-) create mode 100644 lib/pod.go diff --git a/lib/app.go b/lib/app.go index 54abdb3d30..97eb99a7fb 100644 --- a/lib/app.go +++ b/lib/app.go @@ -122,7 +122,7 @@ func appState(app *App, pod *pkgPod.Pod) error { fmt.Fprintf(os.Stderr, "Cannot get GC marked time: %v", err) } if !t.IsZero() { - finishedAt := t.UnixNano() + finishedAt := t.Unix() app.FinishedAt = &finishedAt } } @@ -139,7 +139,7 @@ func appState(app *App, pod *pkgPod.Pod) error { } app.State = AppStateCreated - createdAt := fi.ModTime().UnixNano() + createdAt := fi.ModTime().Unix() app.CreatedAt = &createdAt // Check if the app is started. @@ -152,7 +152,7 @@ func appState(app *App, pod *pkgPod.Pod) error { } app.State = AppStateRunning - startedAt := fi.ModTime().UnixNano() + startedAt := fi.ModTime().Unix() app.StartedAt = &startedAt // Check if the app is exited. @@ -166,7 +166,7 @@ func appState(app *App, pod *pkgPod.Pod) error { } app.State = AppStateExited - finishedAt := fi.ModTime().UnixNano() + finishedAt := fi.ModTime().Unix() app.FinishedAt = &finishedAt // Read exit code. diff --git a/lib/pod.go b/lib/pod.go new file mode 100644 index 0000000000..f868bf14cb --- /dev/null +++ b/lib/pod.go @@ -0,0 +1,50 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package rkt + +import pkgPod "github.com/coreos/rkt/pkg/pod" + +// NewPodFromInternalPod converts *pkgPod.Pod to *Pod +func NewPodFromInternalPod(p *pkgPod.Pod) (*Pod, error) { + _, manifest, err := p.PodManifest() + if err != nil { + return nil, err + } + + pod := &Pod{ + UUID: p.UUID.String(), + State: p.State(), + Networks: p.Nets, + Annotations: make(map[string]string), + } + for _, app := range manifest.Apps { + pod.AppNames = append(pod.AppNames, app.Name.String()) + } + for _, anno := range manifest.Annotations { + pod.Annotations[anno.Name.String()] = anno.Value + } + + startTime, err := p.StartTime() + if err != nil { + return nil, err + } + + if !startTime.IsZero() { + startedAt := startTime.Unix() + pod.StartedAt = &startedAt + } + + return pod, nil +} diff --git a/lib/types.go b/lib/types.go index f29dcce999..85ac1338d4 100644 --- a/lib/types.go +++ b/lib/types.go @@ -72,5 +72,9 @@ type ( Networks []netinfo.NetInfo `json:"networks,omitempty"` // AppNames are the names of the apps. AppNames []string `json:"app_names,omitempty"` + // Annotations are annotations of the pod. + Annotations map[string]string `json:"annotations,omitempty"` + // The start time of the pod. + StartedAt *int64 `json:"started_at,omitempty"` } ) diff --git a/rkt/list.go b/rkt/list.go index 5effd2faa4..776f9e134f 100644 --- a/rkt/list.go +++ b/rkt/list.go @@ -26,7 +26,7 @@ import ( "github.com/appc/spec/schema" "github.com/appc/spec/schema/lastditch" "github.com/appc/spec/schema/types" - "github.com/coreos/rkt/lib" + lib "github.com/coreos/rkt/lib" "github.com/coreos/rkt/networking/netinfo" pkgPod "github.com/coreos/rkt/pkg/pod" "github.com/dustin/go-humanize" @@ -66,9 +66,18 @@ func runList(cmd *cobra.Command, args []string) int { } } - var pods []*rkt.Pod + var pods []*lib.Pod if err := pkgPod.WalkPods(getDataDir(), pkgPod.IncludeMostDirs, func(p *pkgPod.Pod) { + if flagFormat != "" { + pod, err := lib.NewPodFromInternalPod(p) + if err != nil { + errors = append(errors, err) + } + pods = append(pods, pod) + return + } + var pm schema.PodManifest var err error @@ -82,19 +91,6 @@ func runList(cmd *cobra.Command, args []string) int { return } pm = *manifest - - if flagFormat != "" { - pod := &rkt.Pod{ - UUID: p.UUID.String(), - State: podState, - Networks: p.Nets, - } - for _, app := range pm.Apps { - pod.AppNames = append(pod.AppNames, app.Name.String()) - } - pods = append(pods, pod) - return - } } type printedApp struct { diff --git a/rkt/status.go b/rkt/status.go index 40a4f3a415..a0104dd33f 100644 --- a/rkt/status.go +++ b/rkt/status.go @@ -17,9 +17,11 @@ package main import ( + "encoding/json" "fmt" "time" + lib "github.com/coreos/rkt/lib" pkgPod "github.com/coreos/rkt/pkg/pod" "github.com/spf13/cobra" ) @@ -44,6 +46,7 @@ const ( func init() { cmdRkt.AddCommand(cmdStatus) cmdStatus.Flags().BoolVar(&flagWait, "wait", false, "toggle waiting for the pod to exit") + cmdStatus.Flags().StringVar(&flagFormat, "format", "", "choose the output format, allowed format includes 'json', 'json-pretty'. If empty, then the result is printed as key value pairs") } func runStatus(cmd *cobra.Command, args []string) (exit int) { @@ -94,6 +97,28 @@ func getExitStatuses(p *pkgPod.Pod) (map[string]int, error) { // printStatus prints the pod's pid and per-app status codes func printStatus(p *pkgPod.Pod) error { + if flagFormat != "" { + pod, err := lib.NewPodFromInternalPod(p) + if err != nil { + return fmt.Errorf("error converting pod: %v", err) + } + switch flagFormat { + case "json": + result, err := json.Marshal(pod) + if err != nil { + return fmt.Errorf("error marshaling the pod: %v", err) + } + stdout.Print(string(result)) + case "json-pretty": + result, err := json.MarshalIndent(pod, "", "\t") + if err != nil { + return fmt.Errorf("error marshaling the pod: %v", err) + } + stdout.Print(string(result)) + } + return nil + } + state := p.State() stdout.Printf("state=%s", state) From f2c70f0f875685ac7a4071d5c92658832b336e7a Mon Sep 17 00:00:00 2001 From: Krzesimir Nowak Date: Fri, 16 Sep 2016 12:40:23 +0200 Subject: [PATCH 04/32] cri: Prepare isolators --- stage0/app.go | 50 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/stage0/app.go b/stage0/app.go index 1bf8e67b2d..e838af9ab6 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -57,12 +57,18 @@ type StopConfig struct { } // TODO(iaguis): add override options for Exec, Environment (à la patch-manifest) -func AddApp(cfg RunConfig, dir string, img *types.Hash) error { - im, err := cfg.Store.GetImageManifest(img.String()) +func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) error { + // there should be only one app in the config + app := pcfg.Apps.Last() + if app == nil { + return errors.New("no image specified") + } + + am, err := cfg.Store.GetImageManifest(img.String()) if err != nil { return err } - appName, err := imageNameToAppName(im.Name) + appName, err := imageNameToAppName(am.Name) if err != nil { return err } @@ -90,8 +96,8 @@ func AddApp(cfg RunConfig, dir string, img *types.Hash) error { if pm.Apps.Get(*appName) != nil { return fmt.Errorf("error: multiple apps with name %s", *appName) } - if im.App == nil { - return fmt.Errorf("error: image %s has no app section)", img) + if am.App == nil && app.Exec == "" { + return fmt.Errorf("error: image %s has no app section and --exec argument is not provided", img) } appInfoDir := common.AppInfoPath(dir, *appName) @@ -170,13 +176,39 @@ func AddApp(cfg RunConfig, dir string, img *types.Hash) error { ra := schema.RuntimeApp{ Name: *appName, - App: im.App, + App: am.App, Image: schema.RuntimeImage{ - Name: &im.Name, + Name: &am.Name, ID: *img, - Labels: im.Labels, + Labels: am.Labels, }, - // TODO(iaguis): default isolators + } + + if execOverride := app.Exec; execOverride != "" { + // Create a minimal App section if not present + if am.App == nil { + ra.App = &types.App{ + User: strconv.Itoa(os.Getuid()), + Group: strconv.Itoa(os.Getgid()), + } + } + ra.App.Exec = []string{execOverride} + } + + if execAppends := app.Args; execAppends != nil { + ra.App.Exec = append(ra.App.Exec, execAppends...) + } + + if err := prepareIsolators(app, ra.App); err != nil { + return err + } + + if user := app.User; user != "" { + ra.App.User = user + } + + if group := app.Group; group != "" { + ra.App.Group = group } env := ra.App.Environment From fe396ff75b4ed98c1b87b9879fa980f977893d8e Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Mon, 19 Sep 2016 16:59:53 +0200 Subject: [PATCH 05/32] stage1: mount cgroup knobs RW for new app --- common/cgroup/cgroup.go | 54 +++++++++++++++++++++++++++++++++++ stage1/app-start/app-start.go | 34 +++++++++++++++++----- 2 files changed, 81 insertions(+), 7 deletions(-) diff --git a/common/cgroup/cgroup.go b/common/cgroup/cgroup.go index a9fa04ef06..62dfac0055 100644 --- a/common/cgroup/cgroup.go +++ b/common/cgroup/cgroup.go @@ -23,6 +23,7 @@ import ( "io" "io/ioutil" "os" + "os/exec" "path/filepath" "strconv" "strings" @@ -401,6 +402,59 @@ func RemountCgroupsRO(root string, enabledCgroups map[int][]string, subcgroup st return mountFsRO(sysPath) } +// RemountCgroupKnobsRW remounts the needed knobs in the subcgroup for one +// specified app read-write so the systemd inside stage1 can apply isolators +// to them. +func RemountCgroupKnobsRW(enabledCgroups map[int][]string, subcgroup string, serviceName string, enterCmd []string) error { + controllers := GetControllerDirs(enabledCgroups) + + // Mount RW knobs we need to make the enabled isolators work + for _, c := range controllers { + cPath := filepath.Join("/sys/fs/cgroup", c) + subcgroupPath := filepath.Join(cPath, subcgroup, "system.slice") + + // Create cgroup directories and mount the files we need over + // themselves so they stay read-write + appCgroup := filepath.Join(subcgroupPath, serviceName) + if err := os.MkdirAll(appCgroup, 0755); err != nil { + return err + } + for _, f := range getControllerRWFiles(c) { + cgroupFilePath := filepath.Join(appCgroup, f) + // the file may not be there if kernel doesn't support the + // feature, skip it in that case + if _, err := os.Stat(cgroupFilePath); os.IsNotExist(err) { + continue + } + + // Go applications cannot be reassociated with a new mount + // namespace because they are multithreaded. Instead of + // syscall.Mount, uses the enter entrypoint. + argsMountBind := append(enterCmd, "/bin/mount", "--bind", cgroupFilePath, cgroupFilePath) + cmdMountBind := exec.Cmd{ + Path: argsMountBind[0], + Args: argsMountBind, + } + + if err := cmdMountBind.Run(); err != nil { + return err + } + + argsRemountRW := append(enterCmd, "/bin/mount", "-o", "remount,bind,rw", cgroupFilePath) + cmdRemountRW := exec.Cmd{ + Path: argsRemountRW[0], + Args: argsRemountRW, + } + + if err := cmdRemountRW.Run(); err != nil { + return err + } + } + } + + return nil +} + func mountFsRO(mountPoint string) error { var flags uintptr = syscall.MS_BIND | syscall.MS_REMOUNT | diff --git a/stage1/app-start/app-start.go b/stage1/app-start/app-start.go index 53827118ea..439951ee24 100644 --- a/stage1/app-start/app-start.go +++ b/stage1/app-start/app-start.go @@ -22,6 +22,9 @@ import ( "io/ioutil" "os" "os/exec" + "path/filepath" + + "github.com/coreos/rkt/common/cgroup" rktlog "github.com/coreos/rkt/pkg/log" stage1types "github.com/coreos/rkt/stage1/common/types" @@ -72,7 +75,8 @@ func main() { os.Exit(1) } - enterEP := flag.Arg(2) + enterCmd := []string{flag.Arg(2)} + enterCmd = append(enterCmd, fmt.Sprintf("--pid=%s", flag.Arg(3)), "--") root := "." p, err := stage1types.LoadPod(root, uuid) @@ -97,6 +101,26 @@ func main() { ra.App.WorkingDirectory = "/" } + /* prepare cgroups */ + enabledCgroups, err := cgroup.GetEnabledCgroups() + if err != nil { + log.FatalE("error getting cgroups", err) + os.Exit(1) + } + b, err := ioutil.ReadFile(filepath.Join(p.Root, "subcgroup")) + if err == nil { + subcgroup := string(b) + serviceName := stage1initcommon.ServiceUnitName(ra.Name) + + if err := cgroup.RemountCgroupKnobsRW(enabledCgroups, subcgroup, serviceName, enterCmd); err != nil { + log.FatalE("error restricting container cgroups", err) + os.Exit(1) + } + } else { + log.PrintE("continuing with per-app isolators disabled", err) + } + + /* write service file */ binPath, err := stage1initcommon.FindBinPath(p, ra) if err != nil { log.PrintE("failed to find bin path", err) @@ -119,9 +143,7 @@ func main() { os.Exit(1) } - args := []string{enterEP} - - args = append(args, fmt.Sprintf("--pid=%s", flag.Arg(3))) + args := enterCmd args = append(args, "/usr/bin/systemctl") args = append(args, "daemon-reload") @@ -135,9 +157,7 @@ func main() { os.Exit(1) } - args = []string{enterEP} - - args = append(args, fmt.Sprintf("--pid=%s", flag.Arg(3))) + args = enterCmd args = append(args, "/usr/bin/systemctl") args = append(args, "start") args = append(args, appName.String()) From d48c521994283f5536652c0f6ea069ffb0c747bd Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Tue, 20 Sep 2016 15:32:27 +0200 Subject: [PATCH 06/32] stage1: add mount/umount in all flavors --- stage1/init/init.go | 10 ++++++++++ .../manifest-amd64-usr.d/systemd.manifest | 1 + .../manifest-arm64-usr.d/systemd.manifest | 1 + stage1/usr_from_src/mount.mk | 14 ++++++++++++++ stage1/usr_from_src/usr_from_src.mk | 1 + 5 files changed, 27 insertions(+) create mode 100644 stage1/usr_from_src/mount.mk diff --git a/stage1/init/init.go b/stage1/init/init.go index e25af377ce..de8012dc22 100644 --- a/stage1/init/init.go +++ b/stage1/init/init.go @@ -193,6 +193,14 @@ func installAssets() error { if err != nil { return err } + mountBin, err := common.LookupPath("mount", os.Getenv("PATH")) + if err != nil { + return err + } + umountBin, err := common.LookupPath("umount", os.Getenv("PATH")) + if err != nil { + return err + } // More paths could be added in that list if some Linux distributions install it in a different path // Note that we look in /usr/lib/... first because of the merge: // http://www.freedesktop.org/wiki/Software/systemd/TheCaseForTheUsrMerge/ @@ -216,6 +224,8 @@ func installAssets() error { proj2aci.GetAssetString("/usr/bin/systemd-sysusers", systemdSysusersBin), proj2aci.GetAssetString("/usr/lib/systemd/systemd-journald", systemdJournaldBin), proj2aci.GetAssetString("/usr/bin/bash", bashBin), + proj2aci.GetAssetString("/bin/mount", mountBin), + proj2aci.GetAssetString("/bin/umount", umountBin), proj2aci.GetAssetString(fmt.Sprintf("%s/systemd-journald.service", systemdUnitsPath), fmt.Sprintf("%s/systemd-journald.service", systemdUnitsPath)), proj2aci.GetAssetString(fmt.Sprintf("%s/systemd-journald.socket", systemdUnitsPath), fmt.Sprintf("%s/systemd-journald.socket", systemdUnitsPath)), proj2aci.GetAssetString(fmt.Sprintf("%s/systemd-journald-dev-log.socket", systemdUnitsPath), fmt.Sprintf("%s/systemd-journald-dev-log.socket", systemdUnitsPath)), diff --git a/stage1/usr_from_coreos/manifest-amd64-usr.d/systemd.manifest b/stage1/usr_from_coreos/manifest-amd64-usr.d/systemd.manifest index be867bab29..bbed2987a1 100644 --- a/stage1/usr_from_coreos/manifest-amd64-usr.d/systemd.manifest +++ b/stage1/usr_from_coreos/manifest-amd64-usr.d/systemd.manifest @@ -1,6 +1,7 @@ bin/coredumpctl bin/journalctl bin/mount +bin/umount bin/systemctl bin/systemd-analyze bin/systemd-ask-password diff --git a/stage1/usr_from_coreos/manifest-arm64-usr.d/systemd.manifest b/stage1/usr_from_coreos/manifest-arm64-usr.d/systemd.manifest index 47d87b73b9..ad9ca299b7 100644 --- a/stage1/usr_from_coreos/manifest-arm64-usr.d/systemd.manifest +++ b/stage1/usr_from_coreos/manifest-arm64-usr.d/systemd.manifest @@ -2,6 +2,7 @@ bin/bash bin/coredumpctl bin/journalctl bin/mount +bin/umount bin/systemctl bin/systemd-analyze bin/systemd-ask-password diff --git a/stage1/usr_from_src/mount.mk b/stage1/usr_from_src/mount.mk new file mode 100644 index 0000000000..6f263f8f38 --- /dev/null +++ b/stage1/usr_from_src/mount.mk @@ -0,0 +1,14 @@ +$(call setup-stamp-file,UFSM_STAMP) +UFSM_MOUNT_ON_ACI := $(S1_RF_ACIROOTFSDIR)/usr/bin/mount +UFSM_UMOUNT_ON_ACI := $(S1_RF_ACIROOTFSDIR)/usr/bin/umount + +S1_RF_SECONDARY_STAMPS += $(UFSM_STAMP) +S1_RF_INSTALL_FILES += /bin/mount:$(UFSM_MOUNT_ON_ACI):- +S1_RF_INSTALL_FILES += /bin/umount:$(UFSM_UMOUNT_ON_ACI):- +S1_RF_INSTALL_DIRS += $(S1_RF_ACIROOTFSDIR)/usr/bin:- +S1_RF_INSTALL_SYMLINKS += usr/bin:$(S1_RF_ACIROOTFSDIR)/bin + +$(call generate-stamp-rule,$(UFSM_STAMP),$(UFSM_MOUNT_ON_ACI),$(S1_RF_ACIROOTFSDIR)/bin) +# TODO(krzesimir): add a stamp for umount + +$(call undefine-namespaces,UFSM) diff --git a/stage1/usr_from_src/usr_from_src.mk b/stage1/usr_from_src/usr_from_src.mk index 9aa259c3f5..8fb3ab3cb3 100644 --- a/stage1/usr_from_src/usr_from_src.mk +++ b/stage1/usr_from_src/usr_from_src.mk @@ -80,6 +80,7 @@ CLEAN_DIRS += \ CLEAN_SYMLINKS += $(S1_RF_ACIROOTFSDIR)/flavor $(call inc-one,bash.mk) +$(call inc-one,mount.mk) $(call inc-one,libnss.mk) # this makes sure everything is done - ACI rootfs is populated, From 3d1c6e161c05d7724a067cbc718525d767044c3c Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Wed, 21 Sep 2016 13:47:00 +0200 Subject: [PATCH 07/32] cgroup: resolve merge conflict --- common/cgroup/cgroup.go | 4 ++-- stage1/app-start/app-start.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/common/cgroup/cgroup.go b/common/cgroup/cgroup.go index 62dfac0055..3cb864674b 100644 --- a/common/cgroup/cgroup.go +++ b/common/cgroup/cgroup.go @@ -406,7 +406,7 @@ func RemountCgroupsRO(root string, enabledCgroups map[int][]string, subcgroup st // specified app read-write so the systemd inside stage1 can apply isolators // to them. func RemountCgroupKnobsRW(enabledCgroups map[int][]string, subcgroup string, serviceName string, enterCmd []string) error { - controllers := GetControllerDirs(enabledCgroups) + controllers := GetV1ControllerDirs(enabledCgroups) // Mount RW knobs we need to make the enabled isolators work for _, c := range controllers { @@ -419,7 +419,7 @@ func RemountCgroupKnobsRW(enabledCgroups map[int][]string, subcgroup string, ser if err := os.MkdirAll(appCgroup, 0755); err != nil { return err } - for _, f := range getControllerRWFiles(c) { + for _, f := range getV1ControllerRWFiles(c) { cgroupFilePath := filepath.Join(appCgroup, f) // the file may not be there if kernel doesn't support the // feature, skip it in that case diff --git a/stage1/app-start/app-start.go b/stage1/app-start/app-start.go index 439951ee24..ee5d5ecc32 100644 --- a/stage1/app-start/app-start.go +++ b/stage1/app-start/app-start.go @@ -102,7 +102,7 @@ func main() { } /* prepare cgroups */ - enabledCgroups, err := cgroup.GetEnabledCgroups() + enabledCgroups, err := cgroup.GetEnabledV1Cgroups() if err != nil { log.FatalE("error getting cgroups", err) os.Exit(1) From c37e6b81f6772f31e80f8cd591b7a566d500324e Mon Sep 17 00:00:00 2001 From: Casey Callendrello Date: Wed, 21 Sep 2016 15:32:45 +0200 Subject: [PATCH 08/32] CRI: Allow the pod sandbox to accept port forwards Fixes: #3156 --- common/networking/doc.go | 17 +++++ common/networking/ports.go | 119 ++++++++++++++++++++++++++++++++++ networking/kvm.go | 10 ++- networking/networking.go | 11 +++- networking/portfwd.go | 129 ++++++++++++++++++++++++------------- 5 files changed, 236 insertions(+), 50 deletions(-) create mode 100644 common/networking/doc.go create mode 100644 common/networking/ports.go diff --git a/common/networking/doc.go b/common/networking/doc.go new file mode 100644 index 0000000000..86c98648a4 --- /dev/null +++ b/common/networking/doc.go @@ -0,0 +1,17 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// networking is the package that implements small functionality shared +// between state0 and stage1. +package networking diff --git a/common/networking/ports.go b/common/networking/ports.go new file mode 100644 index 0000000000..ce703d691c --- /dev/null +++ b/common/networking/ports.go @@ -0,0 +1,119 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package networking + +import ( + "fmt" + "net" + + "github.com/appc/spec/schema" + "github.com/appc/spec/schema/types" +) + +// ForwardedPort describes a port that will be +// forwarded (mapped) from the host to the pod +type ForwardedPort struct { + PodPort types.Port + HostPort types.ExposedPort +} + +// findAppPort looks through the manifest to find a port with a given name. +// If multiple apps expose the same port name, it will fail +func findAppPort(manifest *schema.PodManifest, portName types.ACName) (*types.Port, error) { + var foundPort *types.Port + + for _, app := range manifest.Apps { + for _, port := range app.App.Ports { + if portName == port.Name { + if foundPort != nil { // error: ambiguous + return nil, fmt.Errorf("port name %q defined multiple apps", portName) + } + p := port // duplicate b/c port gets overwritten + foundPort = &p + } + } + } + return foundPort, nil +} + +// ForwardedPorts matches up ExposedPorts (host ports) with Ports on the app side. +// By default, it tries to match up by name - apps expose ports, and the podspec +// maps them. The podspec can also map from host to pod, without a corresponding app +// (which is needed for CRI) +// This will error if: +// - a name is ambiguous +// - the same port:proto combination is forwarded +func ForwardedPorts(manifest *schema.PodManifest) ([]ForwardedPort, error) { + var fps []ForwardedPort + var err error + + // For every ExposedPort, find its corresponding PodPort + for _, ep := range manifest.Ports { + podPort := ep.PodPort + + // If there is no direct mapping, search for the port by name + if podPort == nil { + podPort, err = findAppPort(manifest, ep.Name) + if err != nil { + return nil, err + } + if podPort == nil { + return nil, fmt.Errorf("port name %q could not be found in any apps", ep.Name) + } + } + fp := ForwardedPort{ + HostPort: ep, + PodPort: *podPort, + } + fp.HostPort.PodPort = &fp.PodPort + if fp.HostPort.HostIP == nil { + fp.HostPort.HostIP = net.IPv4(0, 0, 0, 0) + } + + // Check all already-existing ports for conflicts + for idx := range fps { + if fp.conflicts(&fps[idx]) { + return nil, fmt.Errorf("port %s-%s:%d already mapped to pod port %d", + fp.PodPort.Protocol, fp.HostPort.HostIP.String(), fp.HostPort.HostPort, fps[idx].PodPort.Port) + } + } + + fps = append(fps, fp) + } + return fps, nil +} + +// conflicts checks if two ports conflict with each other +func (fp *ForwardedPort) conflicts(fp1 *ForwardedPort) bool { + if fp.PodPort.Protocol != fp1.PodPort.Protocol { + return false + } + + if fp.HostPort.HostPort != fp1.HostPort.HostPort { + return false + } + + // If either port has the 0.0.0.0 address, they conflict + zeroAddr := net.IPv4(0, 0, 0, 0) + if fp.HostPort.HostIP.Equal(zeroAddr) || fp1.HostPort.HostIP.Equal(zeroAddr) { + return true + } + + if fp.HostPort.HostIP.Equal(fp1.HostPort.HostIP) { + return true + } + + return false +} diff --git a/networking/kvm.go b/networking/kvm.go index aa3c6fb3ae..1e702a6450 100644 --- a/networking/kvm.go +++ b/networking/kvm.go @@ -38,6 +38,7 @@ import ( "github.com/vishvananda/netlink" "github.com/coreos/rkt/common" + commonnet "github.com/coreos/rkt/common/networking" "github.com/coreos/rkt/networking/tuntap" ) @@ -433,7 +434,7 @@ func kvmTransformFlannelNetwork(net *activeNet) error { // kvmSetup prepare new Networking to be used in kvm environment based on tuntap pair interfaces // to allow communication with virtual machine created by lkvm tool -func kvmSetup(podRoot string, podID types.UUID, fps []ForwardedPort, netList common.NetList, localConfig string, noDNS bool) (*Networking, error) { +func kvmSetup(podRoot string, podID types.UUID, fps []commonnet.ForwardedPort, netList common.NetList, localConfig string, noDNS bool) (*Networking, error) { network := Networking{ podEnv: podEnv{ podRoot: podRoot, @@ -628,7 +629,12 @@ func kvmSetup(podRoot string, podID types.UUID, fps []ForwardedPort, netList com if err != nil { return nil, err } + if err := network.setupForwarding(); err != nil { + network.teardownForwarding() + return nil, err + } if err := network.forwardPorts(fps, podIP); err != nil { + network.teardownForwarding() return nil, err } @@ -698,7 +704,7 @@ func (n *Networking) teardownKvmNets() { // similar to Networking.Teardown but without host namespaces func (n *Networking) kvmTeardown() { - if err := n.unforwardPorts(); err != nil { + if err := n.teardownForwarding(); err != nil { stderr.PrintE("error removing forwarded ports (kvm)", err) } n.teardownKvmNets() diff --git a/networking/networking.go b/networking/networking.go index 49e0af3a9e..2da07b08cc 100644 --- a/networking/networking.go +++ b/networking/networking.go @@ -31,6 +31,7 @@ import ( "github.com/vishvananda/netlink" "github.com/coreos/rkt/common" + commonnet "github.com/coreos/rkt/common/networking" "github.com/coreos/rkt/networking/netinfo" "github.com/coreos/rkt/pkg/log" @@ -62,7 +63,7 @@ var stderr *log.Logger // Setup creates a new networking namespace and executes network plugins to // set up networking. It returns in the new pod namespace -func Setup(podRoot string, podID types.UUID, fps []ForwardedPort, netList common.NetList, localConfig, flavor string, noDNS, debug bool) (*Networking, error) { +func Setup(podRoot string, podID types.UUID, fps []commonnet.ForwardedPort, netList common.NetList, localConfig, flavor string, noDNS, debug bool) (*Networking, error) { stderr = log.New(os.Stderr, "networking", debug) @@ -105,8 +106,12 @@ func Setup(podRoot string, podID types.UUID, fps []ForwardedPort, netList common if err != nil { return nil, err } + if err := n.setupForwarding(); err != nil { + n.teardownForwarding() + return nil, err + } if err := n.forwardPorts(fps, podIP); err != nil { - n.unforwardPorts() + n.teardownForwarding() return nil, err } } @@ -269,7 +274,7 @@ func (n *Networking) Teardown(flavor string, debug bool) { return } - if err := n.unforwardPorts(); err != nil { + if err := n.teardownForwarding(); err != nil { stderr.PrintE("error removing forwarded ports", err) } diff --git a/networking/portfwd.go b/networking/portfwd.go index 3eb1bf4110..74021d8370 100644 --- a/networking/portfwd.go +++ b/networking/portfwd.go @@ -20,14 +20,13 @@ import ( "strconv" "github.com/coreos/go-iptables/iptables" + + commonnet "github.com/coreos/rkt/common/networking" ) -// ForwardedPort describes a port that will be -// forwarded (mapped) from the host to the pod -type ForwardedPort struct { - Protocol string - HostPort uint - PodPort uint +type iptablesRule struct { + Chain string + Rule []string } // GetForwardableNet iterates through all loaded networks and returns either @@ -67,11 +66,8 @@ func (n *Networking) GetForwardableNetHostIP() (net.IP, error) { return net.runtime.HostIP, nil } -func (e *podEnv) forwardPorts(fps []ForwardedPort, podIP net.IP) error { - if len(fps) == 0 { - return nil - } - +// setupForwarding creates the iptables chains +func (e *podEnv) setupForwarding() error { ipt, err := iptables.New() if err != nil { return err @@ -97,9 +93,9 @@ func (e *podEnv) forwardPorts(fps []ForwardedPort, podIP net.IP) error { chain string customChainRule []string }{ - {"POSTROUTING", chainRuleSNAT}, // traffic originating from this host + {"POSTROUTING", chainRuleSNAT}, // traffic originating from this host from loopback {"PREROUTING", chainRuleDNAT}, // outside traffic hitting this host - {"OUTPUT", chainRuleDNAT}, // traffic originating from this host + {"OUTPUT", chainRuleDNAT}, // traffic originating from this host on non-loopback } { exists, err := ipt.Exists("nat", entry.chain, entry.customChainRule...) if err != nil { @@ -112,38 +108,23 @@ func (e *podEnv) forwardPorts(fps []ForwardedPort, podIP net.IP) error { } } } + return nil +} - for _, p := range fps { - - socketPod := fmt.Sprintf("%v:%v", podIP, p.PodPort) - dstPortHost := strconv.Itoa(int(p.HostPort)) - dstPortPod := strconv.Itoa(int(p.PodPort)) - - for _, r := range []struct { - chain string - rule []string - }{ - { // Rewrite the destination - chainDNAT, - []string{ - "-p", p.Protocol, - "--dport", dstPortHost, - "-j", "DNAT", - "--to-destination", socketPod, - }, - }, - { // Rewrite the source for connections to localhost on the host - chainSNAT, - []string{ - "-p", p.Protocol, - "-s", "127.0.0.1", - "-d", podIP.String(), - "--dport", dstPortPod, - "-j", "MASQUERADE", - }, - }, - } { - if err := ipt.AppendUnique("nat", r.chain, r.rule...); err != nil { +func (e *podEnv) forwardPorts(fps []commonnet.ForwardedPort, podIP net.IP) error { + if len(fps) == 0 { + return nil + } + ipt, err := iptables.New() + if err != nil { + return err + } + chainDNAT := e.portFwdChain("DNAT") + chainSNAT := e.portFwdChain("SNAT") + + for _, fp := range fps { + for _, r := range portRules(fp, podIP, chainDNAT, chainSNAT) { + if err := ipt.AppendUnique("nat", r.Chain, r.Rule...); err != nil { return err } } @@ -151,7 +132,63 @@ func (e *podEnv) forwardPorts(fps []ForwardedPort, podIP net.IP) error { return nil } -func (e *podEnv) unforwardPorts() error { +func (e *podEnv) unforwardPorts(fps []commonnet.ForwardedPort, podIP net.IP) error { + if len(fps) == 0 { + return nil + } + + ipt, err := iptables.New() + if err != nil { + return err + } + chainDNAT := e.portFwdChain("DNAT") + chainSNAT := e.portFwdChain("SNAT") + + for _, fp := range fps { + for _, r := range portRules(fp, podIP, chainDNAT, chainSNAT) { + if err := ipt.Delete("nat", r.Chain, r.Rule...); err != nil { + return err + } + } + } + return nil +} + +func portRules(fp commonnet.ForwardedPort, podIP net.IP, chainDNAT, chainSNAT string) []iptablesRule { + socketPod := fmt.Sprintf("%v:%v", podIP, fp.PodPort.Port) + dstPortHost := strconv.Itoa(int(fp.HostPort.HostPort)) + dstPortPod := strconv.Itoa(int(fp.PodPort.Port)) + dstIPHost := fp.HostPort.HostIP.String() + + if fp.HostPort.HostIP == nil || dstIPHost == "0.0.0.0" { + dstIPHost = "0.0.0.0/0" + } + + return []iptablesRule{ + { // nat the destination + chainDNAT, + []string{ + "-d", dstIPHost, + "-p", fp.PodPort.Protocol, + "--dport", dstPortHost, + "-j", "DNAT", + "--to-destination", socketPod, + }, + }, + { // Rewrite the source for connections to localhost on the host + chainSNAT, + []string{ + "-p", fp.PodPort.Protocol, + "-s", "127.0.0.1", + "-d", podIP.String(), + "--dport", dstPortPod, + "-j", "MASQUERADE", + }, + }, + } +} + +func (e *podEnv) teardownForwarding() error { ipt, err := iptables.New() if err != nil { return err @@ -187,6 +224,8 @@ func (e *podEnv) unforwardPorts() error { return nil } +// portFwdChain generates the *name* of the chain for pod port forwarding. +// This name must be stable. func (e *podEnv) portFwdChain(name string) string { return fmt.Sprintf("RKT-PFWD-%s-%s", name, e.podID.String()[0:8]) } From 70d5ed8151496dbedf125200a697e3c238a11ea0 Mon Sep 17 00:00:00 2001 From: Casey Callendrello Date: Wed, 21 Sep 2016 15:33:05 +0200 Subject: [PATCH 09/32] run: allow port forwards *from* a specific IP Fixes: #2252 --- rkt/run.go | 21 +++++++++++++----- stage0/run.go | 11 ++++++++++ stage1/init/init.go | 42 ++---------------------------------- tests/rkt_net_nspawn_test.go | 2 ++ tests/rkt_net_test.go | 26 +++++++++++----------- 5 files changed, 45 insertions(+), 57 deletions(-) diff --git a/rkt/run.go b/rkt/run.go index d5750dcc93..a615a18f7d 100644 --- a/rkt/run.go +++ b/rkt/run.go @@ -111,7 +111,7 @@ func init() { */ addStage1ImageFlags(cmdRun.Flags()) - cmdRun.Flags().Var(&flagPorts, "port", "ports to expose on the host (requires contained network). Syntax: --port=NAME:HOSTPORT") + cmdRun.Flags().Var(&flagPorts, "port", "ports to expose on the host (requires contained network). Syntax: --port=NAME:[HOSTIP:]HOSTPORT") cmdRun.Flags().Var(&flagNet, "net", "configure the pod's networking. Optionally, pass a list of user-configured networks to load and set arguments to pass to each network, respectively. Syntax: --net[=n[:args], ...]") cmdRun.Flags().Lookup("net").NoOptDefVal = "default" cmdRun.Flags().BoolVar(&flagInheritEnv, "inherit-env", false, "inherit all environment variables not set by apps") @@ -387,9 +387,9 @@ func runRun(cmd *cobra.Command, args []string) (exit int) { type portList []types.ExposedPort func (pl *portList) Set(s string) error { - parts := strings.SplitN(s, ":", 2) - if len(parts) != 2 { - return fmt.Errorf("%q is not in name:port format", s) + parts := strings.SplitN(s, ":", 3) + if len(parts) < 2 { + return fmt.Errorf("%q is not in name:[ip:]port format", s) } name, err := types.NewACName(parts[0]) @@ -397,7 +397,17 @@ func (pl *portList) Set(s string) error { return errwrap.Wrap(fmt.Errorf("%q is not a valid port name", parts[0]), err) } - port, err := strconv.ParseUint(parts[1], 10, 16) + portStr := parts[1] + var ip net.IP + if len(parts) == 3 { + portStr = parts[2] + ip = net.ParseIP(parts[1]) + if ip == nil { + return fmt.Errorf("%q is not a valid IP", parts[1]) + } + } + + port, err := strconv.ParseUint(portStr, 10, 16) if err != nil { return fmt.Errorf("%q is not a valid port number", parts[1]) } @@ -405,6 +415,7 @@ func (pl *portList) Set(s string) error { p := types.ExposedPort{ Name: *name, HostPort: uint(port), + HostIP: ip, } *pl = append(*pl, p) diff --git a/stage0/run.go b/stage0/run.go index 767ddcea0d..99fc01a557 100644 --- a/stage0/run.go +++ b/stage0/run.go @@ -42,6 +42,7 @@ import ( cnitypes "github.com/containernetworking/cni/pkg/types" "github.com/coreos/rkt/common" "github.com/coreos/rkt/common/apps" + commonnet "github.com/coreos/rkt/common/networking" "github.com/coreos/rkt/common/overlay" "github.com/coreos/rkt/pkg/aci" "github.com/coreos/rkt/pkg/fileutil" @@ -275,7 +276,12 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { // TODO(jonboulle): check that app mountpoint expectations are // satisfied here, rather than waiting for stage1 pm.Volumes = cfg.Apps.Volumes + + // Check to see if ports have any errors pm.Ports = cfg.Ports + if _, err := commonnet.ForwardedPorts(&pm); err != nil { + return nil, err + } // TODO(sur): add to stage1-implementors-guide and to the spec pm.Annotations = append(pm.Annotations, types.Annotation{ @@ -399,6 +405,11 @@ func validatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { return nil, fmt.Errorf("no app section in the pod manifest or the image manifest") } } + + // Validate forwarded ports + if _, err := commonnet.ForwardedPorts(&pm); err != nil { + return nil, err + } return pmb, nil } diff --git a/stage1/init/init.go b/stage1/init/init.go index de8012dc22..fca8d9ed1b 100644 --- a/stage1/init/init.go +++ b/stage1/init/init.go @@ -46,6 +46,7 @@ import ( "github.com/coreos/rkt/common" "github.com/coreos/rkt/common/cgroup" + commonnet "github.com/coreos/rkt/common/networking" "github.com/coreos/rkt/networking" pkgflag "github.com/coreos/rkt/pkg/flag" rktlog "github.com/coreos/rkt/pkg/log" @@ -495,45 +496,6 @@ func getArgsEnv(p *stage1commontypes.Pod, flavor string, canMachinedRegister boo return args, env, nil } -func forwardedPorts(pod *stage1commontypes.Pod) ([]networking.ForwardedPort, error) { - var fps []networking.ForwardedPort - -NextPort: - for _, ep := range pod.Manifest.Ports { - n := "" - fp := networking.ForwardedPort{} - - for _, a := range pod.Manifest.Apps { - for _, p := range a.App.Ports { - if p.Name == ep.Name { - if n == "" { - // skip socket-activated ports, they don't need port forwarding - if p.SocketActivated { - continue NextPort - } - fp.Protocol = p.Protocol - fp.HostPort = ep.HostPort - fp.PodPort = p.Port - n = a.Name.String() - } else { - return nil, fmt.Errorf("ambiguous exposed port in PodManifest: %q and %q both define port %q", n, a.Name, p.Name) - } - } - } - } - - if n == "" { - return nil, fmt.Errorf("port name %q is not defined by any apps", ep.Name) - } - - fps = append(fps, fp) - } - - // TODO(eyakubovich): validate that there're no conflicts - - return fps, nil -} - func stage1() int { uuid, err := types.NewUUID(flag.Arg(0)) if err != nil { @@ -571,7 +533,7 @@ func stage1() int { var n *networking.Networking if netList.Contained() { - fps, err := forwardedPorts(p) + fps, err := commonnet.ForwardedPorts(p.Manifest) if err != nil { log.Error(err) return 1 diff --git a/tests/rkt_net_nspawn_test.go b/tests/rkt_net_nspawn_test.go index 1d28d19bdf..bacac3078f 100644 --- a/tests/rkt_net_nspawn_test.go +++ b/tests/rkt_net_nspawn_test.go @@ -30,6 +30,8 @@ func TestNetPortFwdConnectivity(t *testing.T) { NewNetPortFwdConnectivityTest( defaultSamePortFwdCase, defaultDiffPortFwdCase, + defaultSpecificIPFwdCase, + defaultSpecificIPFwdFailCase, defaultLoSamePortFwdCase, defaultLoDiffPortFwdCase, bridgeSamePortFwdCase, diff --git a/tests/rkt_net_test.go b/tests/rkt_net_test.go index b56d2096ae..fe737dee60 100644 --- a/tests/rkt_net_test.go +++ b/tests/rkt_net_test.go @@ -361,6 +361,7 @@ func NewTestNetDefaultRestrictedConnectivity() testutils.Test { type PortFwdCase struct { HttpGetIP string HttpServePort int + ListenAddress string RktArg string ShouldSucceed bool } @@ -368,10 +369,12 @@ type PortFwdCase struct { var ( bannedPorts = make(map[int]struct{}, 0) - defaultSamePortFwdCase = PortFwdCase{"172.16.28.1", 0, "--net=default", true} - defaultDiffPortFwdCase = PortFwdCase{"172.16.28.1", 1024, "--net=default", true} - defaultLoSamePortFwdCase = PortFwdCase{"127.0.0.1", 0, "--net=default", true} - defaultLoDiffPortFwdCase = PortFwdCase{"127.0.0.1", 1014, "--net=default", true} + defaultSamePortFwdCase = PortFwdCase{"172.16.28.1", 0, "", "--net=default", true} + defaultDiffPortFwdCase = PortFwdCase{"172.16.28.1", 1024, "", "--net=default", true} + defaultSpecificIPFwdCase = PortFwdCase{"172.16.28.1", 1024, "172.16.28.1:", "--net=default", true} + defaultSpecificIPFwdFailCase = PortFwdCase{"127.0.0.1", 1024, "172.16.28.1:", "--net=default", false} + defaultLoSamePortFwdCase = PortFwdCase{"127.0.0.1", 0, "", "--net=default", true} + defaultLoDiffPortFwdCase = PortFwdCase{"127.0.0.1", 1014, "", "--net=default", true} portFwdBridge = networkTemplateT{ Name: "bridge1", @@ -387,10 +390,10 @@ var ( }, }, } - bridgeSamePortFwdCase = PortFwdCase{"11.11.5.1", 0, "--net=" + portFwdBridge.Name, true} - bridgeDiffPortFwdCase = PortFwdCase{"11.11.5.1", 1024, "--net=" + portFwdBridge.Name, true} - bridgeLoSamePortFwdCase = PortFwdCase{"127.0.0.1", 0, "--net=" + portFwdBridge.Name, true} - bridgeLoDiffPortFwdCase = PortFwdCase{"127.0.0.1", 1024, "--net=" + portFwdBridge.Name, true} + bridgeSamePortFwdCase = PortFwdCase{"11.11.5.1", 0, "", "--net=" + portFwdBridge.Name, true} + bridgeDiffPortFwdCase = PortFwdCase{"11.11.5.1", 1024, "", "--net=" + portFwdBridge.Name, true} + bridgeLoSamePortFwdCase = PortFwdCase{"127.0.0.1", 0, "", "--net=" + portFwdBridge.Name, true} + bridgeLoDiffPortFwdCase = PortFwdCase{"127.0.0.1", 1024, "", "--net=" + portFwdBridge.Name, true} ) func (ct PortFwdCase) Execute(t *testing.T, ctx *testutils.RktRunCtx) { @@ -418,8 +421,8 @@ func (ct PortFwdCase) Execute(t *testing.T, ctx *testutils.RktRunCtx) { defer os.Remove(testImage) cmd := fmt.Sprintf( - "%s --debug --insecure-options=image run --port=http:%d %s --mds-register=false %s", - ctx.Cmd(), httpPort, ct.RktArg, testImage) + "%s --debug --insecure-options=image run --port=http:%s%d %s --mds-register=false %s", + ctx.Cmd(), ct.ListenAddress, httpPort, ct.RktArg, testImage) child := spawnOrFail(t, cmd) httpGetAddr := fmt.Sprintf("http://%v:%v", ct.HttpGetIP, httpPort) @@ -448,8 +451,7 @@ func (ct PortFwdCase) Execute(t *testing.T, ctx *testutils.RktRunCtx) { case err == nil && !ct.ShouldSucceed: ga.Fatalf("HTTP-Get to %q should have failed! But received %q", httpGetAddr, body) case err != nil && !ct.ShouldSucceed: - child.Close() - fallthrough + t.Logf("HTTP-Get failed, as expected: %v", err) default: t.Logf("HTTP-Get received: %s", body) } From db210571994f94c51241babb6c38de6e69ac5a66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacek=20J=2E=20=C5=81akis?= Date: Thu, 22 Sep 2016 10:11:56 +0200 Subject: [PATCH 10/32] kvm/init: remove condition for kvm mutable pods --- stage1/init/init.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/stage1/init/init.go b/stage1/init/init.go index fca8d9ed1b..f040f587da 100644 --- a/stage1/init/init.go +++ b/stage1/init/init.go @@ -274,10 +274,6 @@ func getArgsEnv(p *stage1commontypes.Pod, flavor string, canMachinedRegister boo return nil, nil, fmt.Errorf("flag --private-users cannot be used with an lkvm stage1") } - if mutable { - return nil, nil, fmt.Errorf("flag --mutable is not implemented in lkvm stage1") - } - // kernel and hypervisor binaries are located relative to the working directory // of init (/var/lib/rkt/..../uuid) // TODO: move to path.go From 463fdbfb804a797d7b7a07cad53c8a90124ae199 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Wed, 21 Sep 2016 13:58:41 +0200 Subject: [PATCH 11/32] cri: don't remount cgroup knobs RW with cgroup2 --- stage1/app-start/app-start.go | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/stage1/app-start/app-start.go b/stage1/app-start/app-start.go index ee5d5ecc32..be51e09eaf 100644 --- a/stage1/app-start/app-start.go +++ b/stage1/app-start/app-start.go @@ -102,22 +102,29 @@ func main() { } /* prepare cgroups */ - enabledCgroups, err := cgroup.GetEnabledV1Cgroups() + isUnified, err := cgroup.IsCgroupUnified("/") if err != nil { - log.FatalE("error getting cgroups", err) + log.FatalE("failed to determine the cgroup version", err) os.Exit(1) } - b, err := ioutil.ReadFile(filepath.Join(p.Root, "subcgroup")) - if err == nil { - subcgroup := string(b) - serviceName := stage1initcommon.ServiceUnitName(ra.Name) - - if err := cgroup.RemountCgroupKnobsRW(enabledCgroups, subcgroup, serviceName, enterCmd); err != nil { - log.FatalE("error restricting container cgroups", err) + if !isUnified { + enabledCgroups, err := cgroup.GetEnabledV1Cgroups() + if err != nil { + log.FatalE("error getting cgroups", err) os.Exit(1) } - } else { - log.PrintE("continuing with per-app isolators disabled", err) + b, err := ioutil.ReadFile(filepath.Join(p.Root, "subcgroup")) + if err == nil { + subcgroup := string(b) + serviceName := stage1initcommon.ServiceUnitName(ra.Name) + + if err := cgroup.RemountCgroupKnobsRW(enabledCgroups, subcgroup, serviceName, enterCmd); err != nil { + log.FatalE("error restricting container cgroups", err) + os.Exit(1) + } + } else { + log.PrintE("continuing with per-app isolators disabled", err) + } } /* write service file */ From 6982a7342edbc2215115781983046e3251bbfd6c Mon Sep 17 00:00:00 2001 From: Yifan Gu Date: Mon, 19 Sep 2016 16:50:46 -0700 Subject: [PATCH 12/32] CRI: Add '--annotation' and '--label' flag for 'rkt app sandbox'. This enables injecting user-defined pod level labels and annotations. --- lib/pod.go | 23 +++++++++++++++++------ lib/types.go | 6 ++++-- rkt/run.go | 24 ++++++++++++------------ stage0/run.go | 23 ++++++++++++++--------- 4 files changed, 47 insertions(+), 29 deletions(-) diff --git a/lib/pod.go b/lib/pod.go index f868bf14cb..04d604dc0a 100644 --- a/lib/pod.go +++ b/lib/pod.go @@ -24,16 +24,27 @@ func NewPodFromInternalPod(p *pkgPod.Pod) (*Pod, error) { } pod := &Pod{ - UUID: p.UUID.String(), - State: p.State(), - Networks: p.Nets, - Annotations: make(map[string]string), + UUID: p.UUID.String(), + State: p.State(), + Networks: p.Nets, } + for _, app := range manifest.Apps { pod.AppNames = append(pod.AppNames, app.Name.String()) } - for _, anno := range manifest.Annotations { - pod.Annotations[anno.Name.String()] = anno.Value + + if len(manifest.CRIAnnotations) > 0 { + pod.CRIAnnotations = make(map[string]string) + for name, value := range manifest.CRIAnnotations { + pod.CRIAnnotations[name] = value + } + } + + if len(manifest.CRILabels) > 0 { + pod.CRILabels = make(map[string]string) + for name, value := range manifest.CRILabels { + pod.CRILabels[name] = value + } } startTime, err := p.StartTime() diff --git a/lib/types.go b/lib/types.go index 85ac1338d4..b66a3cd365 100644 --- a/lib/types.go +++ b/lib/types.go @@ -72,9 +72,11 @@ type ( Networks []netinfo.NetInfo `json:"networks,omitempty"` // AppNames are the names of the apps. AppNames []string `json:"app_names,omitempty"` - // Annotations are annotations of the pod. - Annotations map[string]string `json:"annotations,omitempty"` // The start time of the pod. StartedAt *int64 `json:"started_at,omitempty"` + // CRIAnnotations are the pod annotations used for CRI. + CRIAnnotations map[string]string `json:"cri_annotations,omitempty"` + // CRILabels are the pod labels used for CRI. + CRILabels map[string]string `json:"cri_labels,omitempty"` } ) diff --git a/rkt/run.go b/rkt/run.go index a615a18f7d..ae60b4d545 100644 --- a/rkt/run.go +++ b/rkt/run.go @@ -62,7 +62,7 @@ image arguments with a lone "---" to resume argument parsing.`, flagNet common.NetList flagPrivateUsers bool flagInheritEnv bool - flagExplicitEnv envMap + flagExplicitEnv kvMap flagEnvFromFile envFileMap flagInteractive bool flagDNS flagStringList @@ -117,7 +117,7 @@ func init() { cmdRun.Flags().BoolVar(&flagInheritEnv, "inherit-env", false, "inherit all environment variables not set by apps") cmdRun.Flags().BoolVar(&flagNoOverlay, "no-overlay", false, "disable overlay filesystem") cmdRun.Flags().BoolVar(&flagPrivateUsers, "private-users", false, "run within user namespaces.") - cmdRun.Flags().Var(&flagExplicitEnv, "set-env", "environment variable to set for apps in the form name=value") + cmdRun.Flags().Var(&flagExplicitEnv, "set-env", "environment variable to set for apps in the form key=value") cmdRun.Flags().Var(&flagEnvFromFile, "set-env-file", "path to an environment variables file") cmdRun.Flags().BoolVar(&flagInteractive, "interactive", false, "run pod interactively. If true, only one image may be supplied.") cmdRun.Flags().Var(&flagDNS, "dns", "name servers to write in /etc/resolv.conf. Pass 'host' to use host's resolv.conf. Pass 'none' to ignore CNI DNS config") @@ -450,35 +450,35 @@ func (dns *flagStringList) Type() string { return "flagStringList" } -// envMap implements the flag.Value interface to contain a set of name=value mappings -type envMap struct { +// kvMap implements the flag.Value interface to contain a set of key=value mappings +type kvMap struct { mapping map[string]string } -func (e *envMap) Set(s string) error { +func (e *kvMap) Set(s string) error { if e.mapping == nil { e.mapping = make(map[string]string) } pair := strings.SplitN(s, "=", 2) if len(pair) != 2 { - return fmt.Errorf("environment variable must be specified as name=value") + return fmt.Errorf("must be specified as key=value") } if _, exists := e.mapping[pair[0]]; exists { - return fmt.Errorf("environment variable %q already set", pair[0]) + return fmt.Errorf("key %q already set", pair[0]) } e.mapping[pair[0]] = pair[1] return nil } -func (e *envMap) IsEmpty() bool { +func (e *kvMap) IsEmpty() bool { return len(e.mapping) == 0 } -func (e *envMap) String() string { +func (e *kvMap) String() string { return strings.Join(e.Strings(), "\n") } -func (e *envMap) Strings() []string { +func (e *kvMap) Strings() []string { var env []string for n, v := range e.mapping { env = append(env, n+"="+v) @@ -486,8 +486,8 @@ func (e *envMap) Strings() []string { return env } -func (e *envMap) Type() string { - return "envMap" +func (e *kvMap) Type() string { + return "kvMap" } // envFileMap diff --git a/stage0/run.go b/stage0/run.go index 99fc01a557..c8d7b3e6a6 100644 --- a/stage0/run.go +++ b/stage0/run.go @@ -61,15 +61,17 @@ var debugEnabled bool // PrepareConfig defines the configuration parameters required by Prepare type PrepareConfig struct { *CommonConfig - Apps *apps.Apps // apps to prepare - InheritEnv bool // inherit parent environment into apps - ExplicitEnv []string // always set these environment variables for all the apps - EnvFromFile []string // environment variables loaded from files, set for all the apps - Ports []types.ExposedPort // list of ports that rkt will expose on the host - UseOverlay bool // prepare pod with overlay fs - SkipTreeStoreCheck bool // skip checking the treestore before rendering - PodManifest string // use the pod manifest specified by the user, this will ignore flags such as '--volume', '--port', etc. - PrivateUsers *user.UidRange // User namespaces + Apps *apps.Apps // apps to prepare + InheritEnv bool // inherit parent environment into apps + ExplicitEnv []string // always set these environment variables for all the apps + EnvFromFile []string // environment variables loaded from files, set for all the apps + Ports []types.ExposedPort // list of ports that rkt will expose on the host + UseOverlay bool // prepare pod with overlay fs + SkipTreeStoreCheck bool // skip checking the treestore before rendering + PodManifest string // use the pod manifest specified by the user, this will ignore flags such as '--volume', '--port', etc. + PrivateUsers *user.UidRange // user namespaces + CRIAnnotations types.CRIAnnotations // CRI annotations for the pod. + CRILabels types.CRILabels // CRI labels for the pod. } // RunConfig defines the configuration parameters needed by Run @@ -289,6 +291,9 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { Value: strconv.FormatBool(cfg.Mutable), }) + pm.CRIAnnotations = cfg.CRIAnnotations + pm.CRILabels = cfg.CRILabels + pmb, err := json.Marshal(pm) if err != nil { return nil, errwrap.Wrap(errors.New("error marshalling pod manifest"), err) From 348facde19c8405d4dc3a0b8563fa15b7ca3c242 Mon Sep 17 00:00:00 2001 From: Yifan Gu Date: Thu, 15 Sep 2016 18:07:16 -0700 Subject: [PATCH 13/32] CRI: Add '--name', '--annotation', '--label', '--environment' for 'rkt app add'. --- common/apps/apps.go | 28 ++++---- lib/app.go | 12 ++-- lib/types.go | 6 +- rkt/cli_apps.go | 160 ++++++++++++++++++++++++++++++++++++++++---- rkt/run.go | 4 ++ stage0/app.go | 38 +++++++++-- 6 files changed, 209 insertions(+), 39 deletions(-) diff --git a/common/apps/apps.go b/common/apps/apps.go index 925d29c51a..4b01216ee6 100644 --- a/common/apps/apps.go +++ b/common/apps/apps.go @@ -45,18 +45,22 @@ const ( ) type App struct { - Image string // the image reference as supplied by the user on the cli - ImType AppImageType // the type of the image reference (to be guessed, url, path or hash) - Args []string // any arguments the user supplied for this app - Asc string // signature file override for image verification (if fetching occurs) - Exec string // exec override for image - Mounts []schema.Mount // mounts for this app (superseding any mounts in rktApps.mounts of same MountPoint) - MemoryLimit *types.ResourceMemory // memory isolator override - CPULimit *types.ResourceCPU // cpu isolator override - User, Group string // user, group overrides - CapsRetain *types.LinuxCapabilitiesRetainSet // os/linux/capabilities-retain-set overrides - CapsRemove *types.LinuxCapabilitiesRevokeSet // os/linux/capabilities-remove-set overrides - SeccompFilter string // seccomp CLI overrides + Name string // the name of the app. If not set, the the image's name will be used. + Image string // the image reference as supplied by the user on the cli + ImType AppImageType // the type of the image reference (to be guessed, url, path or hash) + Args []string // any arguments the user supplied for this app + Asc string // signature file override for image verification (if fetching occurs) + Exec string // exec override for image + Mounts []schema.Mount // mounts for this app (superseding any mounts in rktApps.mounts of same MountPoint) + MemoryLimit *types.ResourceMemory // memory isolator override + CPULimit *types.ResourceCPU // cpu isolator override + User, Group string // user, group overrides + CapsRetain *types.LinuxCapabilitiesRetainSet // os/linux/capabilities-retain-set overrides + CapsRemove *types.LinuxCapabilitiesRevokeSet // os/linux/capabilities-remove-set overrides + SeccompFilter string // seccomp CLI overrides + CRIAnnotations map[string]string // the CRI annotations of the app. + CRILabels map[string]string // the CRI labels of the app. + Environments map[string]string // the environments of the app. // TODO(jonboulle): These images are partially-populated hashes, this should be clarified. ImageID types.Hash // resolved image identifier diff --git a/lib/app.go b/lib/app.go index 97eb99a7fb..41991a9823 100644 --- a/lib/app.go +++ b/lib/app.go @@ -59,9 +59,10 @@ func AppsForPod(uuid, dataDir string, appName string) ([]*App, error) { // newApp constructs the App object with the runtime app and pod manifest. func newApp(ra *schema.RuntimeApp, podManifest *schema.PodManifest, pod *pkgPod.Pod) (*App, error) { app := &App{ - Name: ra.Name.String(), - ImageID: ra.Image.ID.String(), - Annotations: make(map[string]string), + Name: ra.Name.String(), + ImageID: ra.Image.ID.String(), + CRIAnnotations: ra.App.CRIAnnotations, + CRILabels: ra.App.CRILabels, } // Generate mounts @@ -95,11 +96,6 @@ func newApp(ra *schema.RuntimeApp, podManifest *schema.PodManifest, pod *pkgPod. }) } - // Generate annotations. - for _, anno := range ra.Annotations { - app.Annotations[anno.Name.String()] = anno.Value - } - // Generate state. if err := appState(app, pod); err != nil { return nil, fmt.Errorf("error getting app's state: %v", err) diff --git a/lib/types.go b/lib/types.go index b66a3cd365..55879b2d68 100644 --- a/lib/types.go +++ b/lib/types.go @@ -58,8 +58,10 @@ type ( ImageID string `json:"image_id"` // Mount points of the container. Mounts []*Mount `json:"mounts,omitempty"` - // Annotations of the container. - Annotations map[string]string `json:"annotations,omitempty"` + // CRIAnnotations of the container. + CRIAnnotations map[string]string `json:"cri_annotations,omitempty"` + // CRILabels of the container. + CRILabels map[string]string `json:"cri_labels,omitempty"` } // Pod defines the pod object. diff --git a/rkt/cli_apps.go b/rkt/cli_apps.go index 1533f9c79a..f0925c41d4 100644 --- a/rkt/cli_apps.go +++ b/rkt/cli_apps.go @@ -151,6 +151,18 @@ func (ae *appExec) Set(s string) error { return nil } +func (ae *appExec) String() string { + app := (*apps.Apps)(ae).Last() + if app == nil { + return "" + } + return app.Exec +} + +func (ae *appExec) Type() string { + return "appExec" +} + // appMount is for --mount flags in the form of: --mount volume=VOLNAME,target=PATH type appMount apps.Apps @@ -193,19 +205,6 @@ func (al *appMount) Set(s string) error { return nil } -func (ae *appExec) String() string { - app := (*apps.Apps)(ae).Last() - if app == nil { - return "" - } - return app.Exec -} - -func (ae *appExec) Type() string { - return "appExec" -} - -// TODO(vc): --set-env should also be per-app and should implement the flags.Value interface. func (al *appMount) String() string { var ms []string for _, m := range ((*apps.Apps)(al)).Mounts { @@ -440,3 +439,138 @@ func (au *appSeccompFilter) String() string { func (au *appSeccompFilter) Type() string { return "appSeccompFilter" } + +// appName is for --name flags in the form of: --name=APPNAME. +type appName apps.Apps + +func (au *appName) Set(s string) error { + app := (*apps.Apps)(au).Last() + if app == nil { + return fmt.Errorf("--name must follow an image") + } + app.Name = s + return nil +} + +func (au *appName) String() string { + app := (*apps.Apps)(au).Last() + if app == nil { + return "" + } + return app.Name +} + +func (au *appName) Type() string { + return "appName" +} + +// appAnnotation is for --annotation flags in the form of: --annotation=NAME=VALUE. +type appAnnotation apps.Apps + +func (au *appAnnotation) Set(s string) error { + app := (*apps.Apps)(au).Last() + if app == nil { + return fmt.Errorf("--annotation must follow an image") + } + + fields := strings.SplitN(s, "=", 2) + if len(fields) != 2 { + return fmt.Errorf("invalid format of --annotation flag %q", s) + } + + if app.CRIAnnotations == nil { + app.CRIAnnotations = make(map[string]string) + } + app.CRIAnnotations[fields[0]] = fields[1] + return nil +} + +func (au *appAnnotation) String() string { + app := (*apps.Apps)(au).Last() + if app == nil { + return "" + } + var annotations []string + for name, value := range app.CRIAnnotations { + annotations = append(annotations, fmt.Sprintf("%s=%s", name, value)) + } + return strings.Join(annotations, ",") +} + +func (au *appAnnotation) Type() string { + return "appAnnotation" +} + +// appLabel is for --label flags in the form of: --label=NAME=VALUE. +type appLabel apps.Apps + +func (au *appLabel) Set(s string) error { + app := (*apps.Apps)(au).Last() + if app == nil { + return fmt.Errorf("--label must follow an image") + } + + fields := strings.SplitN(s, "=", 2) + if len(fields) != 2 { + return fmt.Errorf("invalid format of --Label flag %q", s) + } + + if app.CRILabels == nil { + app.CRILabels = make(map[string]string) + } + app.CRILabels[fields[0]] = fields[1] + return nil +} + +func (au *appLabel) String() string { + app := (*apps.Apps)(au).Last() + if app == nil { + return "" + } + var labels []string + for name, value := range app.CRILabels { + labels = append(labels, fmt.Sprintf("%s=%s", name, value)) + } + return strings.Join(labels, ",") +} + +func (au *appLabel) Type() string { + return "appLabel" +} + +// appEnv is for --environment flags in the form of --environment=NAME=VALUE. +type appEnv apps.Apps + +func (au *appEnv) Set(s string) error { + app := (*apps.Apps)(au).Last() + if app == nil { + return fmt.Errorf("--environment must follow an image") + } + + fields := strings.SplitN(s, "=", 2) + if len(fields) != 2 { + return fmt.Errorf("invalid format of --environment flag %q", s) + } + + if app.Environments == nil { + app.Environments = make(map[string]string) + } + app.Environments[fields[0]] = fields[1] + return nil +} + +func (au *appEnv) String() string { + app := (*apps.Apps)(au).Last() + if app == nil { + return "" + } + var environments []string + for name, value := range app.Environments { + environments = append(environments, fmt.Sprintf("%s=%s", name, value)) + } + return strings.Join(environments, ",") +} + +func (au *appEnv) Type() string { + return "appEnv" +} diff --git a/rkt/run.go b/rkt/run.go index ae60b4d545..23d7c8c702 100644 --- a/rkt/run.go +++ b/rkt/run.go @@ -100,6 +100,10 @@ func addAppFlags(cmd *cobra.Command) { cmd.Flags().Var((*appMount)(&rktApps), "mount", "mount point binding a volume to a path within an app") cmd.Flags().Var((*appUser)(&rktApps), "user", "user override for the preceding image (example: '--user=user')") cmd.Flags().Var((*appGroup)(&rktApps), "group", "group override for the preceding image (example: '--group=group')") + cmd.Flags().Var((*appName)(&rktApps), "name", "set the name of the app (example: '--name=foo'). If not set, then the app name default to the image's name") + cmd.Flags().Var((*appAnnotation)(&rktApps), "annotation", "set the app's annotations (example: '--annotation=foo=bar')") + cmd.Flags().Var((*appLabel)(&rktApps), "label", "set the app's labels (example: '--label=foo=bar')") + cmd.Flags().Var((*appEnv)(&rktApps), "environment", "set the app's environment variables (example: '--environment=foo=bar')") } func init() { diff --git a/stage0/app.go b/stage0/app.go index e838af9ab6..77374b1ddb 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -56,7 +56,11 @@ type StopConfig struct { PodPID int } -// TODO(iaguis): add override options for Exec, Environment (à la patch-manifest) +type AddConfig struct { + Name *types.ACName + Annotations types.Annotations +} + func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) error { // there should be only one app in the config app := pcfg.Apps.Last() @@ -68,9 +72,18 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro if err != nil { return err } - appName, err := imageNameToAppName(am.Name) - if err != nil { - return err + + var appName *types.ACName + if app.Name != "" { + appName, err = types.NewACName(app.Name) + if err != nil { + return err + } + } else { + appName, err = imageNameToAppName(am.Name) + if err != nil { + return err + } } p, err := stage1types.LoadPod(dir, cfg.UUID) @@ -211,6 +224,23 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro ra.App.Group = group } + if app.CRIAnnotations != nil { + ra.App.CRIAnnotations = app.CRIAnnotations + } + + if app.CRILabels != nil { + ra.App.CRILabels = app.CRILabels + } + + if app.Environments != nil { + envs := make([]string, 0, len(app.Environments)) + for name, value := range app.Environments { + envs = append(envs, fmt.Sprintf("%s=%s", name, value)) + } + // Let the app level environment override the environment variables. + mergeEnvs(&ra.App.Environment, envs, true) + } + env := ra.App.Environment env.Set("AC_APP_NAME", appName.String()) From 04a407701813006fed34ae11cd968396d91f73fc Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Fri, 23 Sep 2016 13:11:09 +0200 Subject: [PATCH 14/32] cri/sandbox: autodect mutable stage1 capabilities This documents and autodetects stage1 mutable cabability detection. It is needed for stage1 images which do not support mutable operations, and retains backwards compatibility with old stage1 images. --- .../devel/stage1-implementors-guide.md | 45 +++++++++++++- stage0/entrypoint.go | 58 ------------------- stage0/{interface.go => manifest.go} | 51 ++++++++++++++++ stage0/run.go | 10 +++- 4 files changed, 104 insertions(+), 60 deletions(-) delete mode 100644 stage0/entrypoint.go rename stage0/{interface.go => manifest.go} (51%) diff --git a/Documentation/devel/stage1-implementors-guide.md b/Documentation/devel/stage1-implementors-guide.md index 935740521a..59431c3e1b 100644 --- a/Documentation/devel/stage1-implementors-guide.md +++ b/Documentation/devel/stage1-implementors-guide.md @@ -69,6 +69,7 @@ Any stage1 that supports and expects machined registration to occur will likely * `--interactive` to run a pod interactively, that is, pass standard input to the application (only for pods with one application) * `--local-config=$PATH` to override the local configuration directory * `--private-users=$SHIFT` to define a UID/GID shift when using user namespaces. SHIFT is a two-value colon-separated parameter, the first value is the host UID to assign to the container and the second one is the number of host UIDs to assign. +* `--mutable` activates a mutable environment in stage1. If the stage1 image manifest has no `app` entrypoint annotations declared, this flag will be unset to retain backwards compatibility. #### Arguments added in interface version 2 @@ -138,7 +139,49 @@ In the bundled rkt stage 1, the entrypoint is sending SIGTERM signal to systemd- * `--force` to force the stopping of the pod. E.g. in the bundled rkt stage 1, stop sends SIGKILL * UUID of the pod -## Versioning +### rkt app start + +`coreos.com/rkt/stage1/app/start` + +#### Arguments + +`start $OPTIONS UUID APPNAME ENTERENTRYPOINT PID` + +* `--debug` to activate debugging +* `--disable-capabilities-restriction` gives all capabilities to apps (overrides `retain-set` and `remove-set`) +* `--disable-paths` disables inaccessible and read-only paths (such as `/proc/sysrq-trigger`) +* `--disable-seccomp` disables seccomp (overrides `retain-set` and `remove-set`) +* `--private-users=$SHIFT` to define a UID/GID shift when using user namespaces. SHIFT is a two-value colon-separated parameter, the first value is the host UID to assign to the container and the second one is the number of host UIDs to assign. + +### rkt app stop + +`coreos.com/rkt/stage1/app/stop` + +#### Arguments + +`stop $OPTIONS UUID APPNAME ENTERENTRYPOINT PID` + +* `--debug` to activate debugging + +### rkt app rm + +`coreos.com/rkt/stage1/app/rm` + +#### Arguments + +`rm $OPTIONS UUID APPNAME ENTERENTRYPOINT PID` + +* `--debug` to activate debugging + +## Metadata + +### Mutable pods + +Stage1 images can support mutable pod environments, where, once a pod has been started, applications can be added/started/stopped/removed while the actual pod is running. This information is persisted at runtime in the pod manifest using the `coreos.com/rkt/stage1/mutable` annotation. + +If the annotation is not present, `false` is assumed. + +### Versioning The stage1 command line interface is versioned using an annotation with the name `coreos.com/rkt/stage1/interface-version`. If the annotation is not present, rkt assumes the version is 1. diff --git a/stage0/entrypoint.go b/stage0/entrypoint.go deleted file mode 100644 index b4457a767d..0000000000 --- a/stage0/entrypoint.go +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright 2015 The rkt Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//+build linux - -package stage0 - -import ( - "encoding/json" - "errors" - "fmt" - "io/ioutil" - - "github.com/appc/spec/schema" - "github.com/coreos/rkt/common" - "github.com/hashicorp/errwrap" -) - -const ( - enterEntrypoint = "coreos.com/rkt/stage1/enter" - runEntrypoint = "coreos.com/rkt/stage1/run" - gcEntrypoint = "coreos.com/rkt/stage1/gc" - stopEntrypoint = "coreos.com/rkt/stage1/stop" - - appRmEntrypoint = "coreos.com/rkt/stage1/app/rm" - appStartEntrypoint = "coreos.com/rkt/stage1/app/start" - appStopEntrypoint = "coreos.com/rkt/stage1/app/stop" -) - -// getStage1Entrypoint retrieves the named entrypoint from the stage1 manifest for a given pod -func getStage1Entrypoint(cdir string, entrypoint string) (string, error) { - b, err := ioutil.ReadFile(common.Stage1ManifestPath(cdir)) - if err != nil { - return "", errwrap.Wrap(errors.New("error reading pod manifest"), err) - } - - s1m := schema.ImageManifest{} - if err := json.Unmarshal(b, &s1m); err != nil { - return "", errwrap.Wrap(errors.New("error unmarshaling stage1 manifest"), err) - } - - if ep, ok := s1m.Annotations.Get(entrypoint); ok { - return ep, nil - } - - return "", fmt.Errorf("entrypoint %q not found", entrypoint) -} diff --git a/stage0/interface.go b/stage0/manifest.go similarity index 51% rename from stage0/interface.go rename to stage0/manifest.go index b1868a63f1..35b92fc4f4 100644 --- a/stage0/interface.go +++ b/stage0/manifest.go @@ -19,6 +19,7 @@ package stage0 import ( "encoding/json" "errors" + "fmt" "io/ioutil" "strconv" @@ -27,10 +28,60 @@ import ( "github.com/hashicorp/errwrap" ) +const ( + enterEntrypoint = "coreos.com/rkt/stage1/enter" + runEntrypoint = "coreos.com/rkt/stage1/run" + gcEntrypoint = "coreos.com/rkt/stage1/gc" + stopEntrypoint = "coreos.com/rkt/stage1/stop" + + appRmEntrypoint = "coreos.com/rkt/stage1/app/rm" + appStartEntrypoint = "coreos.com/rkt/stage1/app/start" + appStopEntrypoint = "coreos.com/rkt/stage1/app/stop" +) + const ( interfaceVersion = "coreos.com/rkt/stage1/interface-version" ) +// supportsMutableEnvironment returns whether the given stage1 image supports mutable pod operations. +// It introspects the stage1 manifest and checks the presence of app* entrypoints. +func supportsMutableEnvironment(cdir string) (bool, error) { + b, err := ioutil.ReadFile(common.Stage1ManifestPath(cdir)) + if err != nil { + return false, errwrap.Wrap(errors.New("error reading pod manifest"), err) + } + + s1m := schema.ImageManifest{} + if err := json.Unmarshal(b, &s1m); err != nil { + return false, errwrap.Wrap(errors.New("error unmarshaling stage1 manifest"), err) + } + + _, appRmOk := s1m.Annotations.Get(appRmEntrypoint) + _, appStartOk := s1m.Annotations.Get(appStartEntrypoint) + _, appStopOk := s1m.Annotations.Get(appStopEntrypoint) + + return appRmOk && appStartOk && appStopOk, nil +} + +// getStage1Entrypoint retrieves the named entrypoint from the stage1 manifest for a given pod +func getStage1Entrypoint(cdir string, entrypoint string) (string, error) { + b, err := ioutil.ReadFile(common.Stage1ManifestPath(cdir)) + if err != nil { + return "", errwrap.Wrap(errors.New("error reading pod manifest"), err) + } + + s1m := schema.ImageManifest{} + if err := json.Unmarshal(b, &s1m); err != nil { + return "", errwrap.Wrap(errors.New("error unmarshaling stage1 manifest"), err) + } + + if ep, ok := s1m.Annotations.Get(entrypoint); ok { + return ep, nil + } + + return "", fmt.Errorf("entrypoint %q not found", entrypoint) +} + // getStage1InterfaceVersion retrieves the interface version from the stage1 // manifest for a given pod func getStage1InterfaceVersion(cdir string) (int, error) { diff --git a/stage0/run.go b/stage0/run.go index c8d7b3e6a6..b3488c9fc7 100644 --- a/stage0/run.go +++ b/stage0/run.go @@ -656,8 +656,16 @@ func Run(cfg RunConfig, dir string, dataDir string) { } } - // TODO(sur): spec out a boolean coreos.com/rkt/stage1/mutable, and introspect here if cfg.Mutable { + mutable, err := supportsMutableEnvironment(dir) + + switch { + case err != nil: + log.FatalE("error determining stage1 mutable support", err) + case !mutable: + log.Fatalln("stage1 does not support mutable pods") + } + args = append(args, "--mutable") } From 1468df88578d4f4b6fff3a0d9242570b865c10f0 Mon Sep 17 00:00:00 2001 From: Yifan Gu Date: Fri, 23 Sep 2016 16:20:38 -0700 Subject: [PATCH 15/32] CRI: Add '--working-dir', '--supplementary-gids', '--readonly-rootfs' flags. Add these flags for 'rkt app add', 'rkt run', 'rkt prepare'. --- common/apps/apps.go | 35 ++++++++++--------- rkt/cli_apps.go | 85 +++++++++++++++++++++++++++++++++++++++++++++ rkt/run.go | 3 ++ stage0/app.go | 25 ++++++++----- stage0/run.go | 27 +++++++++----- 5 files changed, 142 insertions(+), 33 deletions(-) diff --git a/common/apps/apps.go b/common/apps/apps.go index 4b01216ee6..57aafd1c51 100644 --- a/common/apps/apps.go +++ b/common/apps/apps.go @@ -45,22 +45,25 @@ const ( ) type App struct { - Name string // the name of the app. If not set, the the image's name will be used. - Image string // the image reference as supplied by the user on the cli - ImType AppImageType // the type of the image reference (to be guessed, url, path or hash) - Args []string // any arguments the user supplied for this app - Asc string // signature file override for image verification (if fetching occurs) - Exec string // exec override for image - Mounts []schema.Mount // mounts for this app (superseding any mounts in rktApps.mounts of same MountPoint) - MemoryLimit *types.ResourceMemory // memory isolator override - CPULimit *types.ResourceCPU // cpu isolator override - User, Group string // user, group overrides - CapsRetain *types.LinuxCapabilitiesRetainSet // os/linux/capabilities-retain-set overrides - CapsRemove *types.LinuxCapabilitiesRevokeSet // os/linux/capabilities-remove-set overrides - SeccompFilter string // seccomp CLI overrides - CRIAnnotations map[string]string // the CRI annotations of the app. - CRILabels map[string]string // the CRI labels of the app. - Environments map[string]string // the environments of the app. + Name string // the name of the app. If not set, the the image's name will be used. + Image string // the image reference as supplied by the user on the cli + ImType AppImageType // the type of the image reference (to be guessed, url, path or hash) + Args []string // any arguments the user supplied for this app + Asc string // signature file override for image verification (if fetching occurs) + Exec string // exec override for image + WorkingDir string // working directory override for image + ReadOnlyRootFS bool // read-only rootfs override. + Mounts []schema.Mount // mounts for this app (superseding any mounts in rktApps.mounts of same MountPoint) + MemoryLimit *types.ResourceMemory // memory isolator override + CPULimit *types.ResourceCPU // cpu isolator override + User, Group string // user, group overrides + SupplementaryGIDs []int // supplementary gids override + CapsRetain *types.LinuxCapabilitiesRetainSet // os/linux/capabilities-retain-set overrides + CapsRemove *types.LinuxCapabilitiesRevokeSet // os/linux/capabilities-remove-set overrides + SeccompFilter string // seccomp CLI overrides + CRIAnnotations map[string]string // the CRI annotations of the app. + CRILabels map[string]string // the CRI labels of the app. + Environments map[string]string // the environments of the app. // TODO(jonboulle): These images are partially-populated hashes, this should be clarified. ImageID types.Hash // resolved image identifier diff --git a/rkt/cli_apps.go b/rkt/cli_apps.go index f0925c41d4..2bde27429e 100644 --- a/rkt/cli_apps.go +++ b/rkt/cli_apps.go @@ -19,6 +19,7 @@ package main import ( "fmt" "net/url" + "strconv" "strings" "github.com/coreos/rkt/common/apps" @@ -574,3 +575,87 @@ func (au *appEnv) String() string { func (au *appEnv) Type() string { return "appEnv" } + +type appWorkingDir apps.Apps + +func (au *appWorkingDir) Set(s string) error { + app := (*apps.Apps)(au).Last() + if app == nil { + return fmt.Errorf("--working-dir must follow an image") + } + app.WorkingDir = s + return nil +} + +func (au *appWorkingDir) String() string { + app := (*apps.Apps)(au).Last() + if app == nil { + return "" + } + return app.WorkingDir +} + +func (au *appWorkingDir) Type() string { + return "appWorkingDir" +} + +type appReadOnlyRootFS apps.Apps + +func (au *appReadOnlyRootFS) Set(s string) error { + app := (*apps.Apps)(au).Last() + if app == nil { + return fmt.Errorf("--readonly-rootfs must follow an image") + } + value, err := strconv.ParseBool(s) + if err != nil { + return fmt.Errorf("--readonly-rootfs must be set with a boolean") + } + app.ReadOnlyRootFS = value + return nil +} + +func (au *appReadOnlyRootFS) String() string { + app := (*apps.Apps)(au).Last() + if app == nil { + return "" + } + return fmt.Sprintf("%v", app.ReadOnlyRootFS) +} + +func (au *appReadOnlyRootFS) Type() string { + return "appReadOnlyRootFS" +} + +type appSupplementaryGIDs apps.Apps + +func (au *appSupplementaryGIDs) Set(s string) error { + app := (*apps.Apps)(au).Last() + if app == nil { + return fmt.Errorf("--supplementary-gids must follow an image") + } + values := strings.Split(s, ",") + for _, v := range values { + gid, err := strconv.Atoi(v) + if err != nil { + return fmt.Errorf("--supplementary-gids must be integers") + } + app.SupplementaryGIDs = append(app.SupplementaryGIDs, gid) + } + return nil +} + +func (au *appSupplementaryGIDs) String() string { + app := (*apps.Apps)(au).Last() + if app == nil { + return "" + } + var gids []string + for _, gid := range app.SupplementaryGIDs { + gids = append(gids, strconv.Itoa(gid)) + } + return strings.Join(gids, ",") +} + +func (au *appSupplementaryGIDs) Type() string { + return "appSupplementaryGIDs" +} diff --git a/rkt/run.go b/rkt/run.go index 23d7c8c702..fb19b275f9 100644 --- a/rkt/run.go +++ b/rkt/run.go @@ -97,9 +97,12 @@ func addIsolatorFlags(cmd *cobra.Command, compat bool) { func addAppFlags(cmd *cobra.Command) { cmd.Flags().Var((*appExec)(&rktApps), "exec", "override the exec command for the preceding image") + cmd.Flags().Var((*appWorkingDir)(&rktApps), "working-dir", "override the working directory of the preceding image") + cmd.Flags().Var((*appReadOnlyRootFS)(&rktApps), "readonly-rootfs", "if set, the app's rootfs will be mounted read-only") cmd.Flags().Var((*appMount)(&rktApps), "mount", "mount point binding a volume to a path within an app") cmd.Flags().Var((*appUser)(&rktApps), "user", "user override for the preceding image (example: '--user=user')") cmd.Flags().Var((*appGroup)(&rktApps), "group", "group override for the preceding image (example: '--group=group')") + cmd.Flags().Var((*appSupplementaryGIDs)(&rktApps), "supplementary-gids", "supplementary group IDs override for the preceding image (examples: '--supplementary-gids=1024,2048'") cmd.Flags().Var((*appName)(&rktApps), "name", "set the name of the app (example: '--name=foo'). If not set, then the app name default to the image's name") cmd.Flags().Var((*appAnnotation)(&rktApps), "annotation", "set the app's annotations (example: '--annotation=foo=bar')") cmd.Flags().Var((*appLabel)(&rktApps), "label", "set the app's labels (example: '--label=foo=bar')") diff --git a/stage0/app.go b/stage0/app.go index 77374b1ddb..cefe84a83d 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -195,9 +195,10 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro ID: *img, Labels: am.Labels, }, + ReadOnlyRootFS: app.ReadOnlyRootFS, } - if execOverride := app.Exec; execOverride != "" { + if app.Exec != "" { // Create a minimal App section if not present if am.App == nil { ra.App = &types.App{ @@ -205,23 +206,31 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro Group: strconv.Itoa(os.Getgid()), } } - ra.App.Exec = []string{execOverride} + ra.App.Exec = []string{app.Exec} } - if execAppends := app.Args; execAppends != nil { - ra.App.Exec = append(ra.App.Exec, execAppends...) + if app.Args != nil { + ra.App.Exec = append(ra.App.Exec, app.Args...) + } + + if app.WorkingDir != "" { + ra.App.WorkingDirectory = app.WorkingDir } if err := prepareIsolators(app, ra.App); err != nil { return err } - if user := app.User; user != "" { - ra.App.User = user + if app.User != "" { + ra.App.User = app.User + } + + if app.Group != "" { + ra.App.Group = app.Group } - if group := app.Group; group != "" { - ra.App.Group = group + if app.SupplementaryGIDs != nil { + ra.App.SupplementaryGIDs = app.SupplementaryGIDs } if app.CRIAnnotations != nil { diff --git a/stage0/run.go b/stage0/run.go index b3488c9fc7..729c54a3d3 100644 --- a/stage0/run.go +++ b/stage0/run.go @@ -231,10 +231,11 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { ID: img, Labels: am.Labels, }, - Mounts: MergeMounts(cfg.Apps.Mounts, app.Mounts), + Mounts: MergeMounts(cfg.Apps.Mounts, app.Mounts), + ReadOnlyRootFS: app.ReadOnlyRootFS, } - if execOverride := app.Exec; execOverride != "" { + if app.Exec != "" { // Create a minimal App section if not present if am.App == nil { ra.App = &types.App{ @@ -242,23 +243,31 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { Group: strconv.Itoa(os.Getgid()), } } - ra.App.Exec = []string{execOverride} + ra.App.Exec = []string{app.Exec} } - if execAppends := app.Args; execAppends != nil { - ra.App.Exec = append(ra.App.Exec, execAppends...) + if app.Args != nil { + ra.App.Exec = append(ra.App.Exec, app.Args...) + } + + if app.WorkingDir != "" { + ra.App.WorkingDirectory = app.WorkingDir } if err := prepareIsolators(app, ra.App); err != nil { return err } - if user := app.User; user != "" { - ra.App.User = user + if app.User != "" { + ra.App.User = app.User + } + + if app.Group != "" { + ra.App.Group = app.Group } - if group := app.Group; group != "" { - ra.App.Group = group + if app.SupplementaryGIDs != nil { + ra.App.SupplementaryGIDs = app.SupplementaryGIDs } // loading the environment from the lowest priority to highest From 1ae5d195786d43d8aa49f3d0b71e5067c6b77c60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20L=C3=B3pez=20Galeiras?= Date: Wed, 21 Sep 2016 15:03:16 +0200 Subject: [PATCH 16/32] common,tests: refactor GetExitStatus It will be used in more places. --- common/common.go | 15 +++++++++++++++ tests/rkt_tests.go | 21 +++++---------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/common/common.go b/common/common.go index 90a2c8b762..dbf553ddf1 100644 --- a/common/common.go +++ b/common/common.go @@ -432,3 +432,18 @@ func RemoveEmptyLines(str string) []string { return lines } + +// GetExitStatus converts an error to an exit status. If it wasn't an exit +// status != 0 it returns the same error that it was called with +func GetExitStatus(err error) (int, error) { + if err == nil { + return 0, nil + } + if exiterr, ok := err.(*exec.ExitError); ok { + // the program has exited with an exit code != 0 + if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { + return status.ExitStatus(), nil + } + } + return -1, err +} diff --git a/tests/rkt_tests.go b/tests/rkt_tests.go index ca2074a166..363e839d07 100644 --- a/tests/rkt_tests.go +++ b/tests/rkt_tests.go @@ -39,6 +39,7 @@ import ( "github.com/appc/spec/schema/types" "github.com/coreos/gexpect" "github.com/coreos/rkt/api/v1alpha" + "github.com/coreos/rkt/common" "github.com/coreos/rkt/tests/testutils" taas "github.com/coreos/rkt/tests/testutils/aci-server" "google.golang.org/grpc" @@ -122,22 +123,9 @@ func spawnOrFail(t *testing.T, cmd string) *gexpect.ExpectSubprocess { return child } -func getExitStatus(err error) int { - if err == nil { - return 0 - } - if exiterr, ok := err.(*exec.ExitError); ok { - // the program has exited with an exit code != 0 - if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { - return status.ExitStatus() - } - } - return -1 -} - func waitOrFail(t *testing.T, child *gexpect.ExpectSubprocess, expectedStatus int) { err := child.Wait() - status := getExitStatus(err) + status, _ := common.GetExitStatus(err) if status != expectedStatus { t.Fatalf("rkt terminated with unexpected status %d, expected %d\nOutput:\n%s", status, expectedStatus, child.Collect()) } @@ -348,7 +336,8 @@ func runRkt(t *testing.T, rktCmd string, uid, gid int) (string, int) { buf.WriteString(line + "\n") // reappend newline } - return buf.String(), getExitStatus(child.Wait()) + status, _ := common.GetExitStatus(child.Wait()) + return buf.String(), status } func startRktAsGidAndCheckOutput(t *testing.T, rktCmd, expectedLine string, gid int) *gexpect.ExpectSubprocess { @@ -878,7 +867,7 @@ func unmountPod(t *testing.T, ctx *testutils.RktRunCtx, uuid string, rmNetns boo func checkExitStatus(child *gexpect.ExpectSubprocess) error { err := child.Wait() - status := getExitStatus(err) + status, _ := common.GetExitStatus(err) if status != 0 { return fmt.Errorf("rkt terminated with unexpected status %d, expected %d\nOutput:\n%s", status, 0, child.Collect()) } From 178c33819c5e67c3b46d2d581ad5576679403a28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iago=20L=C3=B3pez=20Galeiras?= Date: Wed, 21 Sep 2016 15:03:38 +0200 Subject: [PATCH 17/32] stage0,stage1: handle exit status from stop entrypoint If we try to stop an app that doesn't have a service file (either because it was not started or because the starting failed) we should give an appropriate error message. Also, since `app rm` calls the stop entrypoint, we should identify this error and continue removing the app. --- stage0/app.go | 17 +++++++++++++++-- stage1/app-stop/app-stop.go | 8 ++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/stage0/app.go b/stage0/app.go index cefe84a83d..62de66fefc 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -337,7 +337,7 @@ func callEntrypoint(dir, entrypoint string, args []string) error { } if err := c.Run(); err != nil { - return fmt.Errorf("error executing stage1's app rm: %v", err) + return err } if err := os.Chdir(previousDir); err != nil { @@ -396,7 +396,14 @@ func RmApp(dir string, uuid *types.UUID, usesOverlay bool, appName *types.ACName } if err := callEntrypoint(dir, appStopEntrypoint, args); err != nil { - return err + status, err := common.GetExitStatus(err) + // ignore nonexistent units failing to stop. Exit status 5 + // comes from systemctl and means the unit doesn't exist + if err != nil { + return err + } else if status != 5 { + return fmt.Errorf("exit status %d", status) + } } if err := callEntrypoint(dir, appRmEntrypoint, args); err != nil { @@ -559,6 +566,12 @@ func StopApp(cfg StopConfig) error { } if err := callEntrypoint(cfg.Dir, appStopEntrypoint, args); err != nil { + status, err := common.GetExitStatus(err) + // exit status 5 comes from systemctl and means the unit doesn't exist + if status == 5 { + return fmt.Errorf("app %q is not running", app.Name) + } + return err } diff --git a/stage1/app-stop/app-stop.go b/stage1/app-stop/app-stop.go index f90d5457d7..8ae2c09718 100644 --- a/stage1/app-stop/app-stop.go +++ b/stage1/app-stop/app-stop.go @@ -23,6 +23,7 @@ import ( "os" "os/exec" + "github.com/coreos/rkt/common" rktlog "github.com/coreos/rkt/pkg/log" stage1initcommon "github.com/coreos/rkt/stage1/init/common" @@ -71,8 +72,11 @@ func main() { } if err := cmd.Run(); err != nil { - log.PrintE(fmt.Sprintf("error stopping app %q", appName.String()), err) - os.Exit(1) + status, err := common.GetExitStatus(err) + if err != nil { + os.Exit(1) + } + os.Exit(status) } os.Exit(0) From c517764b338d51ee1baada5946d3578e35eccff1 Mon Sep 17 00:00:00 2001 From: Casey Callendrello Date: Thu, 22 Sep 2016 18:31:15 +0200 Subject: [PATCH 18/32] CRI: add oom_score_adj isolator --- common/apps/apps.go | 1 + rkt/cli_apps.go | 37 ++++++++++++++++ rkt/run.go | 1 + stage0/run.go | 5 +++ stage1/init/common/units.go | 2 + tests/rkt_oom_score_adj_test.go | 76 +++++++++++++++++++++++++++++++++ 6 files changed, 122 insertions(+) create mode 100644 tests/rkt_oom_score_adj_test.go diff --git a/common/apps/apps.go b/common/apps/apps.go index 57aafd1c51..fd5911008b 100644 --- a/common/apps/apps.go +++ b/common/apps/apps.go @@ -61,6 +61,7 @@ type App struct { CapsRetain *types.LinuxCapabilitiesRetainSet // os/linux/capabilities-retain-set overrides CapsRemove *types.LinuxCapabilitiesRevokeSet // os/linux/capabilities-remove-set overrides SeccompFilter string // seccomp CLI overrides + OOMScoreAdj *types.LinuxOOMScoreAdj // oom-score-adj isolator override CRIAnnotations map[string]string // the CRI annotations of the app. CRILabels map[string]string // the CRI labels of the app. Environments map[string]string // the environments of the app. diff --git a/rkt/cli_apps.go b/rkt/cli_apps.go index 2bde27429e..96fb645a55 100644 --- a/rkt/cli_apps.go +++ b/rkt/cli_apps.go @@ -441,6 +441,43 @@ func (au *appSeccompFilter) Type() string { return "appSeccompFilter" } +// appOOMScoreAdj is to adjust /proc/$pid/oom_score_adj +type appOOMScoreAdj apps.Apps + +func (aml *appOOMScoreAdj) Set(s string) error { + app := (*apps.Apps)(aml).Last() + if app == nil { + return fmt.Errorf("--oom-score-adj must follow an image") + } + limit, err := strconv.Atoi(s) + if err != nil { + return err + } + score, err := types.NewLinuxOOMScoreAdj(limit) + if err != nil { + return err + } + + app.OOMScoreAdj = score + return nil +} + +func (aml *appOOMScoreAdj) String() string { + app := (*apps.Apps)(aml).Last() + if app == nil { + return "" + } + adj := app.OOMScoreAdj + if adj == nil { + return "" + } + return strconv.Itoa(int(*adj)) +} + +func (aml *appOOMScoreAdj) Type() string { + return "appOOMScoreAdj" +} + // appName is for --name flags in the form of: --name=APPNAME. type appName apps.Apps diff --git a/rkt/run.go b/rkt/run.go index fb19b275f9..dfcfd91e9f 100644 --- a/rkt/run.go +++ b/rkt/run.go @@ -85,6 +85,7 @@ func addIsolatorFlags(cmd *cobra.Command, compat bool) { cmd.Flags().Var((*appCapsRetain)(&rktApps), "caps-retain", "capability to retain (example: '--caps-retain=CAP_SYS_ADMIN')") cmd.Flags().Var((*appCapsRemove)(&rktApps), "caps-remove", "capability to remove (example: '--caps-remove=CAP_MKNOD')") cmd.Flags().Var((*appSeccompFilter)(&rktApps), "seccomp", "seccomp filter override (example: '--seccomp mode=retain,errno=EPERM,chmod,chown')") + cmd.Flags().Var((*appOOMScoreAdj)(&rktApps), "oom-score-adj", "oom-score-adj isolator override") // For backwards compatibility if compat { diff --git a/stage0/run.go b/stage0/run.go index 729c54a3d3..2ccae20baf 100644 --- a/stage0/run.go +++ b/stage0/run.go @@ -310,6 +310,7 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { return pmb, nil } +// prepareIsolators merges the CLI app parameters with the manifest's app func prepareIsolators(setup *apps.App, app *types.App) error { if memoryOverride := setup.MemoryLimit; memoryOverride != nil { isolator := memoryOverride.AsIsolator() @@ -321,6 +322,10 @@ func prepareIsolators(setup *apps.App, app *types.App) error { app.Isolators = append(app.Isolators, isolator) } + if oomAdjOverride := setup.OOMScoreAdj; oomAdjOverride != nil { + app.Isolators.ReplaceIsolatorsByName(oomAdjOverride.AsIsolator(), []types.ACIdentifier{types.LinuxOOMScoreAdjName}) + } + if setup.CapsRetain != nil && setup.CapsRemove != nil { return fmt.Errorf("error: cannot use both --caps-retain and --caps-remove on the same image") } diff --git a/stage1/init/common/units.go b/stage1/init/common/units.go index 52884e80aa..768fe6588d 100644 --- a/stage1/init/common/units.go +++ b/stage1/init/common/units.go @@ -488,6 +488,8 @@ func (uw *UnitWriter) AppUnit( uw.err = err return } + case *types.LinuxOOMScoreAdj: + opts = append(opts, unit.NewUnitOption("Service", "OOMScoreAdjust", strconv.Itoa(int(*v)))) } } diff --git a/tests/rkt_oom_score_adj_test.go b/tests/rkt_oom_score_adj_test.go new file mode 100644 index 0000000000..ae6adc8012 --- /dev/null +++ b/tests/rkt_oom_score_adj_test.go @@ -0,0 +1,76 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build host coreos src + +package main + +import ( + "fmt" + "os" + "testing" + + "github.com/coreos/rkt/tests/testutils" +) + +func TestOOMScoreAdjust(t *testing.T) { + for _, tt := range []struct { + patch []string + appArgs string + expected string + }{ + { + patch: []string{"--isolators=os/linux/oom-score-adj,99"}, + expected: "<<<99", + }, + { + patch: []string{"--isolators=os/linux/oom-score-adj,-50"}, + expected: "<<<-50", + }, + { + patch: nil, + expected: "<<<0", + }, + { + patch: nil, + appArgs: "--oom-score-adj 42", + expected: "<<<42", + }, + } { + func() { + ctx := testutils.NewRktRunCtx() + defer ctx.Cleanup() + + ps := []string{} + if len(tt.patch) > 0 { + ps = append(ps, tt.patch...) + } + + image := patchTestACI("rkt-oom-adj.aci", ps...) + defer os.Remove(image) + + imageParams := "--exec=/inspect -- -read-file -file-name /proc/self/oom_score_adj" + + rktCmd := fmt.Sprintf( + "%s --debug --insecure-options=image run %s %s %s", + ctx.Cmd(), + image, + tt.appArgs, + imageParams, + ) + + runRktAndCheckOutput(t, rktCmd, tt.expected, false) + }() + } +} From 49699816e5ee0e3947154023622cd2d623ff5777 Mon Sep 17 00:00:00 2001 From: Yifan Gu Date: Thu, 22 Sep 2016 12:27:58 -0700 Subject: [PATCH 19/32] run/repare: Support mutating the app for 'rkt run/prepare' as well. Also removed unneccessary flag conflict checks for --pod-manifest v.s. other flags (--name, --environment, etc) because they are already taken care of since they all need to follow an image. --- common/apps/apps.go | 4 ++-- lib/app.go | 8 ++++---- lib/pod.go | 8 ++++---- lib/types.go | 16 ++++++++-------- rkt/cli_apps.go | 28 ++++++++++++++-------------- rkt/prepare.go | 11 +++++------ rkt/run.go | 15 +++++++-------- stage0/app.go | 8 ++++---- stage0/run.go | 40 +++++++++++++++++++++++++++++++++------- 9 files changed, 81 insertions(+), 57 deletions(-) diff --git a/common/apps/apps.go b/common/apps/apps.go index fd5911008b..c9e8421ee8 100644 --- a/common/apps/apps.go +++ b/common/apps/apps.go @@ -62,8 +62,8 @@ type App struct { CapsRemove *types.LinuxCapabilitiesRevokeSet // os/linux/capabilities-remove-set overrides SeccompFilter string // seccomp CLI overrides OOMScoreAdj *types.LinuxOOMScoreAdj // oom-score-adj isolator override - CRIAnnotations map[string]string // the CRI annotations of the app. - CRILabels map[string]string // the CRI labels of the app. + UserAnnotations map[string]string // the user annotations of the app. + UserLabels map[string]string // the user labels of the app. Environments map[string]string // the environments of the app. // TODO(jonboulle): These images are partially-populated hashes, this should be clarified. diff --git a/lib/app.go b/lib/app.go index 41991a9823..eed6d74ddf 100644 --- a/lib/app.go +++ b/lib/app.go @@ -59,10 +59,10 @@ func AppsForPod(uuid, dataDir string, appName string) ([]*App, error) { // newApp constructs the App object with the runtime app and pod manifest. func newApp(ra *schema.RuntimeApp, podManifest *schema.PodManifest, pod *pkgPod.Pod) (*App, error) { app := &App{ - Name: ra.Name.String(), - ImageID: ra.Image.ID.String(), - CRIAnnotations: ra.App.CRIAnnotations, - CRILabels: ra.App.CRILabels, + Name: ra.Name.String(), + ImageID: ra.Image.ID.String(), + UserAnnotations: ra.App.CRIAnnotations, + UserLabels: ra.App.CRILabels, } // Generate mounts diff --git a/lib/pod.go b/lib/pod.go index 04d604dc0a..e4ad6f5e0a 100644 --- a/lib/pod.go +++ b/lib/pod.go @@ -34,16 +34,16 @@ func NewPodFromInternalPod(p *pkgPod.Pod) (*Pod, error) { } if len(manifest.CRIAnnotations) > 0 { - pod.CRIAnnotations = make(map[string]string) + pod.UserAnnotations = make(map[string]string) for name, value := range manifest.CRIAnnotations { - pod.CRIAnnotations[name] = value + pod.UserAnnotations[name] = value } } if len(manifest.CRILabels) > 0 { - pod.CRILabels = make(map[string]string) + pod.UserLabels = make(map[string]string) for name, value := range manifest.CRILabels { - pod.CRILabels[name] = value + pod.UserLabels[name] = value } } diff --git a/lib/types.go b/lib/types.go index 55879b2d68..a42308ad00 100644 --- a/lib/types.go +++ b/lib/types.go @@ -58,10 +58,10 @@ type ( ImageID string `json:"image_id"` // Mount points of the container. Mounts []*Mount `json:"mounts,omitempty"` - // CRIAnnotations of the container. - CRIAnnotations map[string]string `json:"cri_annotations,omitempty"` - // CRILabels of the container. - CRILabels map[string]string `json:"cri_labels,omitempty"` + // User annotations of the container. + UserAnnotations map[string]string `json:"user_annotations,omitempty"` + // User labels of the container. + UserLabels map[string]string `json:"user_labels,omitempty"` } // Pod defines the pod object. @@ -76,9 +76,9 @@ type ( AppNames []string `json:"app_names,omitempty"` // The start time of the pod. StartedAt *int64 `json:"started_at,omitempty"` - // CRIAnnotations are the pod annotations used for CRI. - CRIAnnotations map[string]string `json:"cri_annotations,omitempty"` - // CRILabels are the pod labels used for CRI. - CRILabels map[string]string `json:"cri_labels,omitempty"` + // UserAnnotations are the pod user annotations. + UserAnnotations map[string]string `json:"user_annotations,omitempty"` + // UserLabels are the pod user labels. + UserLabels map[string]string `json:"user_labels,omitempty"` } ) diff --git a/rkt/cli_apps.go b/rkt/cli_apps.go index 96fb645a55..4a20235737 100644 --- a/rkt/cli_apps.go +++ b/rkt/cli_apps.go @@ -502,24 +502,24 @@ func (au *appName) Type() string { return "appName" } -// appAnnotation is for --annotation flags in the form of: --annotation=NAME=VALUE. +// appAnnotation is for --user-annotation flags in the form of: --user-annotation=NAME=VALUE. type appAnnotation apps.Apps func (au *appAnnotation) Set(s string) error { app := (*apps.Apps)(au).Last() if app == nil { - return fmt.Errorf("--annotation must follow an image") + return fmt.Errorf("--user-annotation must follow an image") } fields := strings.SplitN(s, "=", 2) if len(fields) != 2 { - return fmt.Errorf("invalid format of --annotation flag %q", s) + return fmt.Errorf("invalid format of --user-annotation flag %q", s) } - if app.CRIAnnotations == nil { - app.CRIAnnotations = make(map[string]string) + if app.UserAnnotations == nil { + app.UserAnnotations = make(map[string]string) } - app.CRIAnnotations[fields[0]] = fields[1] + app.UserAnnotations[fields[0]] = fields[1] return nil } @@ -529,7 +529,7 @@ func (au *appAnnotation) String() string { return "" } var annotations []string - for name, value := range app.CRIAnnotations { + for name, value := range app.UserAnnotations { annotations = append(annotations, fmt.Sprintf("%s=%s", name, value)) } return strings.Join(annotations, ",") @@ -539,24 +539,24 @@ func (au *appAnnotation) Type() string { return "appAnnotation" } -// appLabel is for --label flags in the form of: --label=NAME=VALUE. +// appLabel is for --user-label flags in the form of: --user-label=NAME=VALUE. type appLabel apps.Apps func (au *appLabel) Set(s string) error { app := (*apps.Apps)(au).Last() if app == nil { - return fmt.Errorf("--label must follow an image") + return fmt.Errorf("--user-label must follow an image") } fields := strings.SplitN(s, "=", 2) if len(fields) != 2 { - return fmt.Errorf("invalid format of --Label flag %q", s) + return fmt.Errorf("invalid format of --user-label flag %q", s) } - if app.CRILabels == nil { - app.CRILabels = make(map[string]string) + if app.UserLabels == nil { + app.UserLabels = make(map[string]string) } - app.CRILabels[fields[0]] = fields[1] + app.UserLabels[fields[0]] = fields[1] return nil } @@ -566,7 +566,7 @@ func (au *appLabel) String() string { return "" } var labels []string - for name, value := range app.CRILabels { + for name, value := range app.UserLabels { labels = append(labels, fmt.Sprintf("%s=%s", name, value)) } return strings.Join(labels, ",") diff --git a/rkt/prepare.go b/rkt/prepare.go index 44a57a1400..14a5015662 100644 --- a/rkt/prepare.go +++ b/rkt/prepare.go @@ -61,7 +61,7 @@ func init() { cmdPrepare.Flags().BoolVar(&flagInheritEnv, "inherit-env", false, "inherit all environment variables not set by apps") cmdPrepare.Flags().BoolVar(&flagNoOverlay, "no-overlay", false, "disable overlay filesystem") cmdPrepare.Flags().BoolVar(&flagPrivateUsers, "private-users", false, "run within user namespaces.") - cmdPrepare.Flags().Var(&flagExplicitEnv, "set-env", "environment variable to set for apps in the form name=value") + cmdPrepare.Flags().Var(&flagExplicitEnv, "set-env", "environment variable to set for all the apps in the form key=value, this will be overriden by --environment") cmdPrepare.Flags().Var(&flagEnvFromFile, "set-env-file", "the path to an environment variables file") cmdPrepare.Flags().BoolVar(&flagStoreOnly, "store-only", false, "use only available images in the store (do not discover or download from remote URLs)") cmdPrepare.Flags().BoolVar(&flagNoStore, "no-store", false, "fetch images ignoring the local store") @@ -108,11 +108,10 @@ func runPrepare(cmd *cobra.Command, args []string) (exit int) { return 1 } - if len(flagPodManifest) > 0 && (len(flagPorts) > 0 || flagStoreOnly || flagNoStore || - flagInheritEnv || !flagExplicitEnv.IsEmpty() || !flagEnvFromFile.IsEmpty() || - (*appsVolume)(&rktApps).String() != "" || (*appMount)(&rktApps).String() != "" || (*appExec)(&rktApps).String() != "" || - (*appUser)(&rktApps).String() != "" || (*appGroup)(&rktApps).String() != "" || - (*appCapsRetain)(&rktApps).String() != "" || (*appCapsRemove)(&rktApps).String() != "") { + if len(flagPodManifest) > 0 && (rktApps.Count() > 0 || + (*appsVolume)(&rktApps).String() != "" || (*appMount)(&rktApps).String() != "" || + len(flagPorts) > 0 || flagStoreOnly || flagNoStore || + flagInheritEnv || !flagExplicitEnv.IsEmpty() || !flagEnvFromFile.IsEmpty()) { stderr.Print("conflicting flags set with --pod-manifest (see --help)") return 1 } diff --git a/rkt/run.go b/rkt/run.go index dfcfd91e9f..acbc190a50 100644 --- a/rkt/run.go +++ b/rkt/run.go @@ -105,8 +105,8 @@ func addAppFlags(cmd *cobra.Command) { cmd.Flags().Var((*appGroup)(&rktApps), "group", "group override for the preceding image (example: '--group=group')") cmd.Flags().Var((*appSupplementaryGIDs)(&rktApps), "supplementary-gids", "supplementary group IDs override for the preceding image (examples: '--supplementary-gids=1024,2048'") cmd.Flags().Var((*appName)(&rktApps), "name", "set the name of the app (example: '--name=foo'). If not set, then the app name default to the image's name") - cmd.Flags().Var((*appAnnotation)(&rktApps), "annotation", "set the app's annotations (example: '--annotation=foo=bar')") - cmd.Flags().Var((*appLabel)(&rktApps), "label", "set the app's labels (example: '--label=foo=bar')") + cmd.Flags().Var((*appAnnotation)(&rktApps), "user-annotation", "set the app's annotations (example: '--user-annotation=foo=bar')") + cmd.Flags().Var((*appLabel)(&rktApps), "user-label", "set the app's labels (example: '--user-label=foo=bar')") cmd.Flags().Var((*appEnv)(&rktApps), "environment", "set the app's environment variables (example: '--environment=foo=bar')") } @@ -125,7 +125,7 @@ func init() { cmdRun.Flags().BoolVar(&flagInheritEnv, "inherit-env", false, "inherit all environment variables not set by apps") cmdRun.Flags().BoolVar(&flagNoOverlay, "no-overlay", false, "disable overlay filesystem") cmdRun.Flags().BoolVar(&flagPrivateUsers, "private-users", false, "run within user namespaces.") - cmdRun.Flags().Var(&flagExplicitEnv, "set-env", "environment variable to set for apps in the form key=value") + cmdRun.Flags().Var(&flagExplicitEnv, "set-env", "environment variable to set for all the apps in the form key=value, this will be overriden by --environment") cmdRun.Flags().Var(&flagEnvFromFile, "set-env-file", "path to an environment variables file") cmdRun.Flags().BoolVar(&flagInteractive, "interactive", false, "run pod interactively. If true, only one image may be supplied.") cmdRun.Flags().Var(&flagDNS, "dns", "name servers to write in /etc/resolv.conf. Pass 'host' to use host's resolv.conf. Pass 'none' to ignore CNI DNS config") @@ -193,11 +193,10 @@ func runRun(cmd *cobra.Command, args []string) (exit int) { return 1 } - if len(flagPodManifest) > 0 && (len(flagPorts) > 0 || rktApps.Count() > 0 || flagStoreOnly || flagNoStore || - flagInheritEnv || !flagExplicitEnv.IsEmpty() || !flagEnvFromFile.IsEmpty() || - (*appsVolume)(&rktApps).String() != "" || (*appMount)(&rktApps).String() != "" || (*appExec)(&rktApps).String() != "" || - (*appUser)(&rktApps).String() != "" || (*appGroup)(&rktApps).String() != "" || - (*appCapsRetain)(&rktApps).String() != "" || (*appCapsRemove)(&rktApps).String() != "") { + if len(flagPodManifest) > 0 && (rktApps.Count() > 0 || + (*appsVolume)(&rktApps).String() != "" || (*appMount)(&rktApps).String() != "" || + len(flagPorts) > 0 || flagStoreOnly || flagNoStore || + flagInheritEnv || !flagExplicitEnv.IsEmpty() || !flagEnvFromFile.IsEmpty()) { stderr.Print("conflicting flags set with --pod-manifest (see --help)") return 1 } diff --git a/stage0/app.go b/stage0/app.go index 62de66fefc..2d72c42c59 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -233,12 +233,12 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro ra.App.SupplementaryGIDs = app.SupplementaryGIDs } - if app.CRIAnnotations != nil { - ra.App.CRIAnnotations = app.CRIAnnotations + if app.UserAnnotations != nil { + ra.App.CRIAnnotations = app.UserAnnotations } - if app.CRILabels != nil { - ra.App.CRILabels = app.CRILabels + if app.UserLabels != nil { + ra.App.CRILabels = app.UserLabels } if app.Environments != nil { diff --git a/stage0/run.go b/stage0/run.go index 2ccae20baf..9cb991c561 100644 --- a/stage0/run.go +++ b/stage0/run.go @@ -70,8 +70,8 @@ type PrepareConfig struct { SkipTreeStoreCheck bool // skip checking the treestore before rendering PodManifest string // use the pod manifest specified by the user, this will ignore flags such as '--volume', '--port', etc. PrivateUsers *user.UidRange // user namespaces - CRIAnnotations types.CRIAnnotations // CRI annotations for the pod. - CRILabels types.CRILabels // CRI labels for the pod. + UserAnnotations types.CRIAnnotations // user annotations for the pod. + UserLabels types.CRILabels // user labels for the pod. } // RunConfig defines the configuration parameters needed by Run @@ -208,9 +208,18 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { if err != nil { return errwrap.Wrap(errors.New("error getting the manifest"), err) } - appName, err := imageNameToAppName(am.Name) - if err != nil { - return errwrap.Wrap(errors.New("error converting image name to app name"), err) + + var appName *types.ACName + if app.Name != "" { + appName, err = types.NewACName(app.Name) + if err != nil { + return errwrap.Wrap(errors.New("invalid app name format"), err) + } + } else { + appName, err = imageNameToAppName(am.Name) + if err != nil { + return errwrap.Wrap(errors.New("error converting image name to app name"), err) + } } if err := prepareAppImage(cfg, *appName, img, dir, cfg.UseOverlay); err != nil { @@ -270,6 +279,14 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { ra.App.SupplementaryGIDs = app.SupplementaryGIDs } + if app.UserAnnotations != nil { + ra.App.CRIAnnotations = app.UserAnnotations + } + + if app.UserLabels != nil { + ra.App.CRILabels = app.UserLabels + } + // loading the environment from the lowest priority to highest if cfg.InheritEnv { // Inherit environment does not override app image environment @@ -278,6 +295,15 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { mergeEnvs(&ra.App.Environment, cfg.EnvFromFile, true) mergeEnvs(&ra.App.Environment, cfg.ExplicitEnv, true) + + if app.Environments != nil { + envs := make([]string, 0, len(app.Environments)) + for name, value := range app.Environments { + envs = append(envs, fmt.Sprintf("%s=%s", name, value)) + } + mergeEnvs(&ra.App.Environment, envs, true) + } + pm.Apps = append(pm.Apps, ra) return nil }); err != nil { @@ -300,8 +326,8 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { Value: strconv.FormatBool(cfg.Mutable), }) - pm.CRIAnnotations = cfg.CRIAnnotations - pm.CRILabels = cfg.CRILabels + pm.CRIAnnotations = cfg.UserAnnotations + pm.CRILabels = cfg.UserLabels pmb, err := json.Marshal(pm) if err != nil { From 4d735db628ab805ada9d42fb4c0765f6e71075b8 Mon Sep 17 00:00:00 2001 From: Yifan Gu Date: Thu, 22 Sep 2016 17:15:34 -0700 Subject: [PATCH 20/32] Documentation: Update docs for rkt run/prepare. --- Documentation/subcommands/prepare.md | 4 +++ Documentation/subcommands/run.md | 46 +++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/Documentation/subcommands/prepare.md b/Documentation/subcommands/prepare.md index 97b05bd330..81471274b1 100644 --- a/Documentation/subcommands/prepare.md +++ b/Documentation/subcommands/prepare.md @@ -52,12 +52,16 @@ c9fad0e6-8236-4fc2-ad17-55d0a4c7d742 | Flag | Default | Options | Description | | --- | --- | --- | --- | +| `--user-annotation` | none | annotation add to the app's UserAnnotations field | Set the app's annotations (example: '--annotation=foo=bar'). | | `--caps-remove` | none | capability to remove (example: '--caps-remove=CAP\_SYS\_CHROOT,CAP\_MKNOD') | Capabilities to remove from the process's capabilities bounding set, all others from the default set will be included | | `--caps-retain` | none | capability to retain (example: '--caps-remove=CAP\_SYS\_ADMIN,CAP\_NET\_ADMIN') | Capabilities to retain in the process's capabilities bounding set, all others will be removed | +| `--environment` | none | environment variables add to the app's environment variables | Set the app's environment variables (example: '--environment=foo=bar'). | | `--exec` | none | Path to executable | Override the exec command for the preceding image. | | `--group` | root | gid, groupname or file path | Group override for the preceding image (example: '--group=group') | | `--inherit-env` | `false` | `true` or `false` | Inherit all environment variables not set by apps. | +| `--user-label` | none | label add to the apps' UserLabels field | Set the app's labels (example: '--label=foo=bar'). | | `--mount` | none | Mount syntax (ex. `--mount volume=NAME,target=PATH`) | Mount point binding a volume to a path within an app. See [Mounting Volumes without Mount Points](#mounting-volumes-without-mount-points). | +| `--name` | none | Name of the app | Set the name of the app (example: '--name=foo'). If not set, then the app name default to the image's name | | `--no-overlay` | `false` | `true` or `false` | Disable the overlay filesystem. | | `--no-store` | `false` | `true` or `false` | Fetch images, ignoring the local store. See [image fetching behavior](../image-fetching-behavior.md) | | `--pod-manifest` | none | A path | The path to the pod manifest. If it's non-empty, then only `--net`, `--no-overlay` and `--interactive` will have effect. | diff --git a/Documentation/subcommands/run.md b/Documentation/subcommands/run.md index 6e26f869f8..b10d55524f 100644 --- a/Documentation/subcommands/run.md +++ b/Documentation/subcommands/run.md @@ -32,6 +32,16 @@ Multiple applications can be run in a pod by passing multiple images to the run # rkt run example.com/app1 example.com/app2 ``` +## Overriding the app's name + +Be default, the image's name will be used as the app's name. +It can be overridden by rkt using the `--name` flag. +This comes handy when we want to run multiple apps using the same image: + +``` +# rkt --insecure-options=image run docker://busybox --name=busybox1 docker://busybox --name=busybox2 +``` + ## Overriding Executable to launch Application images include an `exec` field that specifies the executable to launch. @@ -74,19 +84,34 @@ This can be combined with overridden executables: # rkt run example.com/worker --exec /bin/ov -- --loglevel verbose --- example.com/syncer --exec /bin/syncer2 -- --interval 30s ``` +## Adding user annotations and user labels + +Additional annotations and labels can be added to the app by using `--user-annotation` and `--user-label` flag. +The annotations and labels will appear in the app's `UserAnnotations` and `UserLabels` field. + +``` +# rkt run example.com/example --user-annotation=foo=bar --user-label=hello=world +``` + ## Influencing Environment Variables -To inherit all environment variables from the parent use the `--inherit-env` flag. +To inherit all environment variables from the parent, use the `--inherit-env` flag. + +To explicitly set environment variables for all apps, use the `--set-env` flag. -To explicitly set individual environment variables use the `--set-env` flag. +To explicitly set environment variables for all apps from a file, use the `--set-env-file` flag. +Variables are expected to be in the format `VAR_NAME=VALUE` separated by the new line character `\n`. +Lines starting with `#` or `;` and empty ones will be ignored. + +To explicitly set environment variables for each app individually, use the `--environment` flag. -To explicitly set environment variables from a file use the `--set-env-file` flag. Variables are expected to be in the format `VAR_NAME=VALUE` separated by the new line character `\n`. Lines starting with `#` or `;` and empty ones will be ignored. The precedence is as follows with the last item replacing previous environment entries: - Parent environment - App image environment -- Explicitly set environment variables from file (`--set-env-file`) -- Explicitly set environment variables on command line (`--set-env`) +- Explicitly set environment variables for all apps from file (`--set-env-file`) +- Explicitly set environment variables for all apps on command line (`--set-env`) +- Explicitly set environment variables for each app on command line (`--environment`) ``` # export EXAMPLE_ENV=hello @@ -95,6 +120,13 @@ The precedence is as follows with the last item replacing previous environment e EXAMPLE_ENV=hello FOO=bar EXAMPLE_OVERRIDE=over + +# export EXAMPLE_ENV=hello +# export EXAMPLE_OVERRIDE=under +# rkt run --inherit-env --set-env=FOO=bar --set-env=EXAMPLE_OVERRIDE=over example.com/env-printer --environment=EXAMPLE_OVERRIDE=ride +EXAMPLE_ENV=hello +FOO=bar +EXAMPLE_OVERRIDE=ride ``` ## Disable Signature Verification @@ -355,20 +387,24 @@ This feature will be disabled automatically if the underlying filesystem does no | Flag | Default | Options | Description | | --- | --- | --- | --- | +| `--user-annotation` | none | annotation add to the app's UserAnnotations field | Set the app's annotations (example: '--user-annotation=foo=bar'). | | `--caps-remove` | none | capability to remove (e.g. `--caps-remove=CAP_SYS_CHROOT,CAP_MKNOD`) | Capabilities to remove from the process's capabilities bounding set; all others from the default set will be included. | | `--caps-retain` | none | capability to retain (e.g. `--caps-retain=CAP_SYS_ADMIN,CAP_NET_ADMIN`) | Capabilities to retain in the process's capabilities bounding set; all others will be removed. | | `--cpu` | none | CPU units (e.g. `--cpu=500m`) | CPU limit for the preceding image in [Kubernetes resource model](https://github.com/kubernetes/kubernetes/blob/release-1.2/docs/design/resources.md) format. | | `--dns` | none | IP Address | Name server to write in `/etc/resolv.conf`. It can be specified several times. | | `--dns-opt` | none | DNS option | DNS option from resolv.conf(5) to write in `/etc/resolv.conf`. It can be specified several times. | | `--dns-search` | none | Domain name | DNS search domain to write in `/etc/resolv.conf`. It can be specified several times. | +| `--environment` | none | environment variables add to the app's environment variables | Set the app's environment variables (example: '--environment=foo=bar'). | | `--exec` | none | Path to executable | Override the exec command for the preceding image. | | `--group` | root | gid, groupname or file path (e.g. `--group=core`) | Group override for the preceding image. | | `--hostname` | `rkt-$PODUUID` | A host name | Set pod's host name. | | `--inherit-env` | `false` | `true` or `false` | Inherit all environment variables not set by apps. | | `--interactive` | `false` | `true` or `false` | Run pod interactively. If true, only one image may be supplied. | +| `--user-label` | none | label add to the apps' UserLabels field | Set the app's labels (example: '--user-label=foo=bar'). | | `--mds-register` | `false` | `true` or `false` | Register pod with metadata service. It needs network connectivity to the host (`--net` as `default`, `default-restricted`, or `host`). | | `--memory` | none | Memory units (e.g. `--memory=50M`) | Memory limit for the preceding image in [Kubernetes resource model](https://github.com/kubernetes/kubernetes/blob/release-1.2/docs/design/resources.md) format. | | `--mount` | none | Mount syntax (e.g. `--mount volume=NAME,target=PATH`) | Mount point binding a volume to a path within an app. See [Mounting Volumes without Mount Points](#mounting-volumes-without-mount-points). | +| `--name` | none | Name of the app | Set the name of the app (example: '--name=foo'). If not set, then the app name default to the image's name | | `--net` | `default` | A comma-separated list of networks. (e.g. `--net[=n[:args], ...]`) | Configure the pod's networking. Optionally, pass a list of user-configured networks to load and set arguments to pass to each network, respectively. | | `--no-overlay` | `false` | `true` or `false` | Disable the overlay filesystem. | | `--no-store` | `false` | `true` or `false` | Fetch images, ignoring the local store. See [image fetching behavior](../image-fetching-behavior.md). | From 62dd49d2e758e4fbbe3ba613ff8cfbe644fa4884 Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Mon, 26 Sep 2016 11:26:48 +0200 Subject: [PATCH 21/32] app-start: set up unit files/cgroups during app-add Currently we set up the app's service during app-start which is not the right place. This implements the proper logic to set up systemd service files, as well as any remounts of cgroups during app-add. --- .../devel/stage1-implementors-guide.md | 14 +- stage0/app.go | 129 ++++++++----- stage0/manifest.go | 1 + stage1/aci/aci-manifest.in | 4 + stage1/app-add/app-add.go | 169 ++++++++++++++++++ stage1/app-add/app-add.mk | 1 + stage1/app-start/app-start.go | 116 +----------- stage1/secondary-stuff.mk | 1 + 8 files changed, 272 insertions(+), 163 deletions(-) create mode 100644 stage1/app-add/app-add.go create mode 100644 stage1/app-add/app-add.mk diff --git a/Documentation/devel/stage1-implementors-guide.md b/Documentation/devel/stage1-implementors-guide.md index 59431c3e1b..3c483cfd65 100644 --- a/Documentation/devel/stage1-implementors-guide.md +++ b/Documentation/devel/stage1-implementors-guide.md @@ -139,9 +139,9 @@ In the bundled rkt stage 1, the entrypoint is sending SIGTERM signal to systemd- * `--force` to force the stopping of the pod. E.g. in the bundled rkt stage 1, stop sends SIGKILL * UUID of the pod -### rkt app start +### rkt app add -`coreos.com/rkt/stage1/app/start` +`coreos.com/rkt/stage1/app/add` #### Arguments @@ -153,6 +153,16 @@ In the bundled rkt stage 1, the entrypoint is sending SIGTERM signal to systemd- * `--disable-seccomp` disables seccomp (overrides `retain-set` and `remove-set`) * `--private-users=$SHIFT` to define a UID/GID shift when using user namespaces. SHIFT is a two-value colon-separated parameter, the first value is the host UID to assign to the container and the second one is the number of host UIDs to assign. +### rkt app start + +`coreos.com/rkt/stage1/app/start` + +#### Arguments + +`start $OPTIONS UUID APPNAME ENTERENTRYPOINT PID` + +* `--debug` to activate debugging + ### rkt app stop `coreos.com/rkt/stage1/app/stop` diff --git a/stage0/app.go b/stage0/app.go index 2d72c42c59..b3b9d26ed6 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -28,6 +28,7 @@ import ( "syscall" "github.com/coreos/rkt/common" + "github.com/coreos/rkt/common/apps" "github.com/coreos/rkt/pkg/aci" "github.com/coreos/rkt/pkg/user" // FIXME this should not be in stage1 anymore @@ -40,13 +41,10 @@ import ( type StartConfig struct { *CommonConfig - Dir string - UsesOverlay bool - AppName *types.ACName - PodPID int - InsecureCapabilities bool // Do not restrict capabilities - InsecurePaths bool // Do not restrict access to files in sysfs or procfs - InsecureSeccomp bool // Do not add seccomp restrictions + Dir string + UsesOverlay bool + AppName *types.ACName + PodPID int } type StopConfig struct { @@ -57,18 +55,26 @@ type StopConfig struct { } type AddConfig struct { - Name *types.ACName - Annotations types.Annotations + *CommonConfig + Image types.Hash + Apps *apps.Apps + RktGid int + UsesOverlay bool + PodPath string + PodPID int + InsecureCapabilities bool + InsecurePaths bool + InsecureSeccomp bool } -func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) error { +func AddApp(cfg AddConfig) error { // there should be only one app in the config - app := pcfg.Apps.Last() + app := cfg.Apps.Last() if app == nil { return errors.New("no image specified") } - am, err := cfg.Store.GetImageManifest(img.String()) + am, err := cfg.Store.GetImageManifest(cfg.Image.String()) if err != nil { return err } @@ -86,7 +92,7 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro } } - p, err := stage1types.LoadPod(dir, cfg.UUID) + p, err := stage1types.LoadPod(cfg.PodPath, cfg.UUID) if err != nil { return errwrap.Wrap(errors.New("error loading pod manifest"), err) } @@ -109,11 +115,12 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro if pm.Apps.Get(*appName) != nil { return fmt.Errorf("error: multiple apps with name %s", *appName) } + if am.App == nil && app.Exec == "" { - return fmt.Errorf("error: image %s has no app section and --exec argument is not provided", img) + return fmt.Errorf("error: image %s has no app section and --exec argument is not provided", cfg.Image) } - appInfoDir := common.AppInfoPath(dir, *appName) + appInfoDir := common.AppInfoPath(cfg.PodPath, *appName) if err := os.MkdirAll(appInfoDir, common.DefaultRegularDirPerm); err != nil { return errwrap.Wrap(errors.New("error creating apps info directory"), err) } @@ -121,8 +128,8 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro uidRange := user.NewBlankUidRange() // TODO(iaguis): DRY: refactor this var treeStoreID string - if cfg.UseOverlay { - treeStoreID, _, err := cfg.TreeStore.Render(img.String(), false) + if cfg.UsesOverlay { + treeStoreID, _, err := cfg.TreeStore.Render(cfg.Image.String(), false) if err != nil { return errwrap.Wrap(errors.New("error rendering tree image"), err) } @@ -131,25 +138,25 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro if err != nil { log.PrintE("warning: tree cache is in a bad state. Rebuilding...", err) var err error - treeStoreID, hash, err = cfg.TreeStore.Render(img.String(), true) + treeStoreID, hash, err = cfg.TreeStore.Render(cfg.Image.String(), true) if err != nil { return errwrap.Wrap(errors.New("error rendering tree image"), err) } } cfg.RootHash = hash - if err := ioutil.WriteFile(common.AppTreeStoreIDPath(dir, *appName), []byte(treeStoreID), common.DefaultRegularFilePerm); err != nil { + if err := ioutil.WriteFile(common.AppTreeStoreIDPath(cfg.PodPath, *appName), []byte(treeStoreID), common.DefaultRegularFilePerm); err != nil { return errwrap.Wrap(errors.New("error writing app treeStoreID"), err) } } else { - ad := common.AppPath(dir, *appName) + ad := common.AppPath(cfg.PodPath, *appName) err := os.MkdirAll(ad, common.DefaultRegularDirPerm) if err != nil { return errwrap.Wrap(errors.New("error creating image directory"), err) } - privateUsers, err := preparedWithPrivateUsers(dir) + privateUsers, err := preparedWithPrivateUsers(cfg.PodPath) if err != nil { log.FatalE("error reading user namespace information", err) } @@ -167,21 +174,27 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro return errwrap.Wrap(fmt.Errorf("error shifting app %q's stage2 dir", *appName), err) } - if err := aci.RenderACIWithImageID(*img, ad, cfg.Store, uidRange); err != nil { + if err := aci.RenderACIWithImageID(cfg.Image, ad, cfg.Store, uidRange); err != nil { return errwrap.Wrap(errors.New("error rendering ACI"), err) } } - if err := writeManifest(*cfg.CommonConfig, *img, appInfoDir); err != nil { + if err := writeManifest(*cfg.CommonConfig, cfg.Image, appInfoDir); err != nil { return errwrap.Wrap(errors.New("error writing manifest"), err) } - if err := setupAppImage(cfg, *appName, *img, dir, cfg.UseOverlay); err != nil { + rcfg := RunConfig{ + CommonConfig: cfg.CommonConfig, + UseOverlay: cfg.UsesOverlay, + RktGid: cfg.RktGid, + } + + if err := setupAppImage(rcfg, *appName, cfg.Image, cfg.PodPath, cfg.UsesOverlay); err != nil { return fmt.Errorf("error setting up app image: %v", err) } - if cfg.UseOverlay { - imgDir := filepath.Join(dir, "overlay", treeStoreID) + if cfg.UsesOverlay { + imgDir := filepath.Join(cfg.PodPath, "overlay", treeStoreID) if err := os.Chown(imgDir, -1, cfg.RktGid); err != nil { return err } @@ -192,7 +205,7 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro App: am.App, Image: schema.RuntimeImage{ Name: &am.Name, - ID: *img, + ID: cfg.Image, Labels: am.Labels, }, ReadOnlyRootFS: app.ReadOnlyRootFS, @@ -253,7 +266,7 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro env := ra.App.Environment env.Set("AC_APP_NAME", appName.String()) - envFilePath := filepath.Join(common.Stage1RootfsPath(dir), "rkt", "env", appName.String()) + envFilePath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "env", appName.String()) if err := common.WriteEnvFile(env, uidRange, envFilePath); err != nil { return err @@ -262,14 +275,51 @@ func AddApp(pcfg PrepareConfig, cfg RunConfig, dir string, img *types.Hash) erro apps := append(p.Manifest.Apps, ra) p.Manifest.Apps = apps - if err := updatePodManifest(dir, p.Manifest); err != nil { + if err := updatePodManifest(cfg.PodPath, p.Manifest); err != nil { return err } + eep, err := getStage1Entrypoint(cfg.PodPath, enterEntrypoint) + if err != nil { + return errwrap.Wrap(errors.New("error determining 'enter' entrypoint"), err) + } + + args := []string{ + cfg.UUID.String(), + app.Name, + filepath.Join(common.Stage1RootfsPath(cfg.PodPath), eep), + strconv.Itoa(cfg.PodPID), + } + + if cfg.InsecureCapabilities { + args = append(args, "--disable-capabilities-restriction") + } + + if cfg.InsecurePaths { + args = append(args, "--disable-paths") + } + + if cfg.InsecureSeccomp { + args = append(args, "--disable-seccomp") + } + + privateUsers, err := preparedWithPrivateUsers(cfg.PodPath) + if err != nil { + log.FatalE("error reading user namespace information", err) + } + + if privateUsers != "" { + args = append(args, fmt.Sprintf("--private-users=%s", privateUsers)) + } + if _, err := os.Create(common.AppCreatedPath(p.Root, appName.String())); err != nil { return err } + if err := callEntrypoint(cfg.PodPath, appAddEntrypoint, args); err != nil { + return err + } + return nil } @@ -337,7 +387,7 @@ func callEntrypoint(dir, entrypoint string, args []string) error { } if err := c.Run(); err != nil { - return err + return fmt.Errorf("error executing stage1's entrypoint %q: %v", entrypoint, err) } if err := os.Chdir(previousDir); err != nil { @@ -497,25 +547,6 @@ func StartApp(cfg StartConfig) error { strconv.Itoa(cfg.PodPID), } - if cfg.InsecureCapabilities { - args = append(args, "--disable-capabilities-restriction") - } - if cfg.InsecurePaths { - args = append(args, "--disable-paths") - } - if cfg.InsecureSeccomp { - args = append(args, "--disable-seccomp") - } - - privateUsers, err := preparedWithPrivateUsers(cfg.Dir) - if err != nil { - log.FatalE("error reading user namespace information", err) - } - - if privateUsers != "" { - args = append(args, fmt.Sprintf("--private-users=%s", privateUsers)) - } - if _, err := os.Create(common.AppStartedPath(p.Root, cfg.AppName.String())); err != nil { log.FatalE(fmt.Sprintf("error creating %s-started file", cfg.AppName.String()), err) } diff --git a/stage0/manifest.go b/stage0/manifest.go index 35b92fc4f4..651918919b 100644 --- a/stage0/manifest.go +++ b/stage0/manifest.go @@ -34,6 +34,7 @@ const ( gcEntrypoint = "coreos.com/rkt/stage1/gc" stopEntrypoint = "coreos.com/rkt/stage1/stop" + appAddEntrypoint = "coreos.com/rkt/stage1/app/add" appRmEntrypoint = "coreos.com/rkt/stage1/app/rm" appStartEntrypoint = "coreos.com/rkt/stage1/app/start" appStopEntrypoint = "coreos.com/rkt/stage1/app/stop" diff --git a/stage1/aci/aci-manifest.in b/stage1/aci/aci-manifest.in index 3b575d71dd..0023564a6f 100644 --- a/stage1/aci/aci-manifest.in +++ b/stage1/aci/aci-manifest.in @@ -33,6 +33,10 @@ "name": "coreos.com/rkt/stage1/stop", "value": "@RKT_STAGE1_STOP@" }, + { + "name": "coreos.com/rkt/stage1/app/add", + "value": "/app-add" + }, { "name": "coreos.com/rkt/stage1/app/rm", "value": "/app-rm" diff --git a/stage1/app-add/app-add.go b/stage1/app-add/app-add.go new file mode 100644 index 0000000000..e3a235442c --- /dev/null +++ b/stage1/app-add/app-add.go @@ -0,0 +1,169 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//+build linux + +package main + +import ( + "flag" + "fmt" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + + "github.com/coreos/go-systemd/unit" + "github.com/coreos/rkt/common/cgroup" + rktlog "github.com/coreos/rkt/pkg/log" + stage1types "github.com/coreos/rkt/stage1/common/types" + stage1initcommon "github.com/coreos/rkt/stage1/init/common" + + "github.com/appc/spec/schema/types" +) + +var ( + debug bool + disableCapabilities bool + disablePaths bool + disableSeccomp bool + privateUsers string + log *rktlog.Logger + diag *rktlog.Logger +) + +func init() { + flag.BoolVar(&debug, "debug", false, "Run in debug mode") + flag.BoolVar(&disableCapabilities, "disable-capabilities-restriction", false, "Disable capability restrictions") + flag.BoolVar(&disablePaths, "disable-paths", false, "Disable paths restrictions") + flag.BoolVar(&disableSeccomp, "disable-seccomp", false, "Disable seccomp restrictions") + flag.StringVar(&privateUsers, "private-users", "", "Run within user namespace. Can be set to [=UIDBASE[:NUIDS]]") +} + +// TODO use named flags instead of positional +func main() { + flag.Parse() + + stage1initcommon.InitDebug(debug) + + log, diag, _ = rktlog.NewLogSet("stage1", debug) + if !debug { + diag.SetOutput(ioutil.Discard) + } + + uuid, err := types.NewUUID(flag.Arg(0)) + if err != nil { + log.PrintE("UUID is missing or malformed", err) + os.Exit(1) + } + + appName, err := types.NewACName(flag.Arg(1)) + if err != nil { + log.PrintE("invalid app name", err) + os.Exit(1) + } + + enterCmd := []string{flag.Arg(2)} + enterCmd = append(enterCmd, fmt.Sprintf("--pid=%s", flag.Arg(3)), "--") + + root := "." + p, err := stage1types.LoadPod(root, uuid) + if err != nil { + log.PrintE("failed to load pod", err) + os.Exit(1) + } + + insecureOptions := stage1initcommon.Stage1InsecureOptions{ + DisablePaths: disablePaths, + DisableCapabilities: disableCapabilities, + DisableSeccomp: disableSeccomp, + } + + ra := p.Manifest.Apps.Get(*appName) + if ra == nil { + log.Printf("failed to get app") + os.Exit(1) + } + + if ra.App.WorkingDirectory == "" { + ra.App.WorkingDirectory = "/" + } + + /* prepare cgroups */ + isUnified, err := cgroup.IsCgroupUnified("/") + if err != nil { + log.FatalE("failed to determine the cgroup version", err) + os.Exit(1) + } + + if !isUnified { + enabledCgroups, err := cgroup.GetEnabledV1Cgroups() + if err != nil { + log.FatalE("error getting cgroups", err) + os.Exit(1) + } + + b, err := ioutil.ReadFile(filepath.Join(p.Root, "subcgroup")) + if err == nil { + subcgroup := string(b) + serviceName := stage1initcommon.ServiceUnitName(ra.Name) + + if err := cgroup.RemountCgroupKnobsRW(enabledCgroups, subcgroup, serviceName, enterCmd); err != nil { + log.FatalE("error restricting container cgroups", err) + os.Exit(1) + } + } else { + log.PrintE("continuing with per-app isolators disabled", err) + } + } + + /* write service file */ + binPath, err := stage1initcommon.FindBinPath(p, ra) + if err != nil { + log.PrintE("failed to find bin path", err) + os.Exit(1) + } + + w := stage1initcommon.NewUnitWriter(p) + + w.AppUnit(ra, binPath, privateUsers, insecureOptions, + unit.NewUnitOption("Unit", "Before", "halt.target"), + unit.NewUnitOption("Unit", "Conflicts", "halt.target"), + unit.NewUnitOption("Service", "StandardOutput", "journal+console"), + unit.NewUnitOption("Service", "StandardError", "journal+console"), + ) + + w.AppReaperUnit(ra.Name, binPath) + + if err := w.Error(); err != nil { + log.PrintE("error generating app units", err) + os.Exit(1) + } + + args := enterCmd + args = append(args, "/usr/bin/systemctl") + args = append(args, "daemon-reload") + + cmd := exec.Cmd{ + Path: args[0], + Args: args, + } + + if err := cmd.Run(); err != nil { + log.PrintE(`error executing "systemctl daemon-reload"`, err) + os.Exit(1) + } + + os.Exit(0) +} diff --git a/stage1/app-add/app-add.mk b/stage1/app-add/app-add.mk new file mode 100644 index 0000000000..b96c5ea77b --- /dev/null +++ b/stage1/app-add/app-add.mk @@ -0,0 +1 @@ +include stage1/makelib/aci_simple_go_bin.mk diff --git a/stage1/app-start/app-start.go b/stage1/app-start/app-start.go index be51e09eaf..2dc9b7423c 100644 --- a/stage1/app-start/app-start.go +++ b/stage1/app-start/app-start.go @@ -22,37 +22,23 @@ import ( "io/ioutil" "os" "os/exec" - "path/filepath" - - "github.com/coreos/rkt/common/cgroup" rktlog "github.com/coreos/rkt/pkg/log" - stage1types "github.com/coreos/rkt/stage1/common/types" stage1initcommon "github.com/coreos/rkt/stage1/init/common" "github.com/appc/spec/schema/types" - "github.com/coreos/go-systemd/unit" ) var ( - debug bool - disableCapabilities bool - disablePaths bool - disableSeccomp bool - privateUsers string - log *rktlog.Logger - diag *rktlog.Logger + debug bool + log *rktlog.Logger + diag *rktlog.Logger ) func init() { flag.BoolVar(&debug, "debug", false, "Run in debug mode") - flag.BoolVar(&disableCapabilities, "disable-capabilities-restriction", false, "Disable capability restrictions") - flag.BoolVar(&disablePaths, "disable-paths", false, "Disable paths restrictions") - flag.BoolVar(&disableSeccomp, "disable-seccomp", false, "Disable seccomp restrictions") - flag.StringVar(&privateUsers, "private-users", "", "Run within user namespace. Can be set to [=UIDBASE[:NUIDS]]") } -// TODO use named flags instead of positional func main() { flag.Parse() @@ -63,12 +49,6 @@ func main() { diag.SetOutput(ioutil.Discard) } - uuid, err := types.NewUUID(flag.Arg(0)) - if err != nil { - log.PrintE("UUID is missing or malformed", err) - os.Exit(1) - } - appName, err := types.NewACName(flag.Arg(1)) if err != nil { log.PrintE("invalid app name", err) @@ -78,98 +58,12 @@ func main() { enterCmd := []string{flag.Arg(2)} enterCmd = append(enterCmd, fmt.Sprintf("--pid=%s", flag.Arg(3)), "--") - root := "." - p, err := stage1types.LoadPod(root, uuid) - if err != nil { - log.PrintE("failed to load pod", err) - os.Exit(1) - } - - insecureOptions := stage1initcommon.Stage1InsecureOptions{ - DisablePaths: disablePaths, - DisableCapabilities: disableCapabilities, - DisableSeccomp: disableSeccomp, - } - - ra := p.Manifest.Apps.Get(*appName) - if ra == nil { - log.Printf("failed to get app") - os.Exit(1) - } - - if ra.App.WorkingDirectory == "" { - ra.App.WorkingDirectory = "/" - } - - /* prepare cgroups */ - isUnified, err := cgroup.IsCgroupUnified("/") - if err != nil { - log.FatalE("failed to determine the cgroup version", err) - os.Exit(1) - } - if !isUnified { - enabledCgroups, err := cgroup.GetEnabledV1Cgroups() - if err != nil { - log.FatalE("error getting cgroups", err) - os.Exit(1) - } - b, err := ioutil.ReadFile(filepath.Join(p.Root, "subcgroup")) - if err == nil { - subcgroup := string(b) - serviceName := stage1initcommon.ServiceUnitName(ra.Name) - - if err := cgroup.RemountCgroupKnobsRW(enabledCgroups, subcgroup, serviceName, enterCmd); err != nil { - log.FatalE("error restricting container cgroups", err) - os.Exit(1) - } - } else { - log.PrintE("continuing with per-app isolators disabled", err) - } - } - - /* write service file */ - binPath, err := stage1initcommon.FindBinPath(p, ra) - if err != nil { - log.PrintE("failed to find bin path", err) - os.Exit(1) - } - - w := stage1initcommon.NewUnitWriter(p) - - w.AppUnit(ra, binPath, privateUsers, insecureOptions, - unit.NewUnitOption("Unit", "Before", "halt.target"), - unit.NewUnitOption("Unit", "Conflicts", "halt.target"), - unit.NewUnitOption("Service", "StandardOutput", "journal+console"), - unit.NewUnitOption("Service", "StandardError", "journal+console"), - ) - - w.AppReaperUnit(ra.Name, binPath) - - if err := w.Error(); err != nil { - log.PrintE("error generating app units", err) - os.Exit(1) - } - args := enterCmd args = append(args, "/usr/bin/systemctl") - args = append(args, "daemon-reload") - - cmd := exec.Cmd{ - Path: args[0], - Args: args, - } - - if err := cmd.Run(); err != nil { - log.PrintE("error executing daemon-reload", err) - os.Exit(1) - } - - args = enterCmd - args = append(args, "/usr/bin/systemctl") args = append(args, "start") args = append(args, appName.String()) - cmd = exec.Cmd{ + cmd := exec.Cmd{ Path: args[0], Args: args, } @@ -179,7 +73,5 @@ func main() { os.Exit(1) } - // TODO unmount all the volumes - os.Exit(0) } diff --git a/stage1/secondary-stuff.mk b/stage1/secondary-stuff.mk index f4b9e3723d..449bf4a5f2 100644 --- a/stage1/secondary-stuff.mk +++ b/stage1/secondary-stuff.mk @@ -12,6 +12,7 @@ _S1_SS_SUBDIRS_ := \ reaper \ stop \ stop_kvm \ + app-add \ app-rm \ app-start \ app-stop \ From 5eab251cb17113bbba4d7848d80752839b9cc5cc Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Tue, 27 Sep 2016 12:38:26 +0200 Subject: [PATCH 22/32] app: remove code duplication for preparing the stage1 image This removes some duplicated code in AddApp which was previously copied over. --- stage0/app.go | 60 ++++++++++----------------------------------------- stage0/run.go | 51 ++++++++++++++++++++++++------------------- 2 files changed, 40 insertions(+), 71 deletions(-) diff --git a/stage0/app.go b/stage0/app.go index b3b9d26ed6..eeb923b19a 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -29,7 +29,6 @@ import ( "github.com/coreos/rkt/common" "github.com/coreos/rkt/common/apps" - "github.com/coreos/rkt/pkg/aci" "github.com/coreos/rkt/pkg/user" // FIXME this should not be in stage1 anymore stage1types "github.com/coreos/rkt/stage1/common/types" @@ -125,62 +124,25 @@ func AddApp(cfg AddConfig) error { return errwrap.Wrap(errors.New("error creating apps info directory"), err) } - uidRange := user.NewBlankUidRange() - // TODO(iaguis): DRY: refactor this - var treeStoreID string - if cfg.UsesOverlay { - treeStoreID, _, err := cfg.TreeStore.Render(cfg.Image.String(), false) - if err != nil { - return errwrap.Wrap(errors.New("error rendering tree image"), err) - } - - hash, err := cfg.TreeStore.Check(treeStoreID) - if err != nil { - log.PrintE("warning: tree cache is in a bad state. Rebuilding...", err) - var err error - treeStoreID, hash, err = cfg.TreeStore.Render(cfg.Image.String(), true) - if err != nil { - return errwrap.Wrap(errors.New("error rendering tree image"), err) - } - } - cfg.RootHash = hash - - if err := ioutil.WriteFile(common.AppTreeStoreIDPath(cfg.PodPath, *appName), []byte(treeStoreID), common.DefaultRegularFilePerm); err != nil { - return errwrap.Wrap(errors.New("error writing app treeStoreID"), err) - } - } else { - ad := common.AppPath(cfg.PodPath, *appName) - - err := os.MkdirAll(ad, common.DefaultRegularDirPerm) - if err != nil { - return errwrap.Wrap(errors.New("error creating image directory"), err) - } + pcfg := PrepareConfig{ + CommonConfig: cfg.CommonConfig, + PrivateUsers: user.NewBlankUidRange(), + } + if cfg.UsesOverlay { privateUsers, err := preparedWithPrivateUsers(cfg.PodPath) if err != nil { log.FatalE("error reading user namespace information", err) } - if err := uidRange.Deserialize([]byte(privateUsers)); err != nil { + if err := pcfg.PrivateUsers.Deserialize([]byte(privateUsers)); err != nil { return err } - - shiftedUid, shiftedGid, err := uidRange.ShiftRange(uint32(os.Getuid()), uint32(os.Getgid())) - if err != nil { - return errwrap.Wrap(errors.New("error getting uid, gid"), err) - } - - if err := os.Chown(ad, int(shiftedUid), int(shiftedGid)); err != nil { - return errwrap.Wrap(fmt.Errorf("error shifting app %q's stage2 dir", *appName), err) - } - - if err := aci.RenderACIWithImageID(cfg.Image, ad, cfg.Store, uidRange); err != nil { - return errwrap.Wrap(errors.New("error rendering ACI"), err) - } } - if err := writeManifest(*cfg.CommonConfig, cfg.Image, appInfoDir); err != nil { - return errwrap.Wrap(errors.New("error writing manifest"), err) + treeStoreID, err := prepareAppImage(pcfg, *appName, cfg.Image, cfg.PodPath, cfg.UsesOverlay) + if err != nil { + return errwrap.Wrap(fmt.Errorf("error preparing image %s", cfg.Image), err) } rcfg := RunConfig{ @@ -268,7 +230,7 @@ func AddApp(cfg AddConfig) error { env.Set("AC_APP_NAME", appName.String()) envFilePath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "env", appName.String()) - if err := common.WriteEnvFile(env, uidRange, envFilePath); err != nil { + if err := common.WriteEnvFile(env, pcfg.PrivateUsers, envFilePath); err != nil { return err } @@ -286,7 +248,7 @@ func AddApp(cfg AddConfig) error { args := []string{ cfg.UUID.String(), - app.Name, + appName.String(), filepath.Join(common.Stage1RootfsPath(cfg.PodPath), eep), strconv.Itoa(cfg.PodPID), } diff --git a/stage0/run.go b/stage0/run.go index 9cb991c561..0502452cfd 100644 --- a/stage0/run.go +++ b/stage0/run.go @@ -222,8 +222,8 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { } } - if err := prepareAppImage(cfg, *appName, img, dir, cfg.UseOverlay); err != nil { - return errwrap.Wrap(fmt.Errorf("error setting up image %s", img), err) + if _, err := prepareAppImage(cfg, *appName, img, dir, cfg.UseOverlay); err != nil { + return errwrap.Wrap(fmt.Errorf("error preparing image %s", img), err) } if pm.Apps.Get(*appName) != nil { return fmt.Errorf("error: multiple apps with name %s", am.Name) @@ -439,8 +439,8 @@ func validatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { if err != nil { return nil, errwrap.Wrap(errors.New("error getting the image manifest from store"), err) } - if err := prepareAppImage(cfg, ra.Name, img.ID, dir, cfg.UseOverlay); err != nil { - return nil, errwrap.Wrap(fmt.Errorf("error setting up image %s", img), err) + if _, err := prepareAppImage(cfg, ra.Name, img.ID, dir, cfg.UseOverlay); err != nil { + return nil, errwrap.Wrap(fmt.Errorf("error preparing image %s", img), err) } if _, ok := appNames[ra.Name]; ok { return nil, fmt.Errorf("multiple apps with same name %s", ra.Name) @@ -731,38 +731,42 @@ func Run(cfg RunConfig, dir string, dataDir string) { // prepareAppImage renders and verifies the tree cache of the app image that // corresponds to the given app name. -// When useOverlay is false, it attempts to render and expand the app image -func prepareAppImage(cfg PrepareConfig, appName types.ACName, img types.Hash, cdir string, useOverlay bool) error { +// When useOverlay is false, it attempts to render and expand the app image. +// It returns the tree store ID if overlay is being used. +func prepareAppImage(cfg PrepareConfig, appName types.ACName, img types.Hash, cdir string, useOverlay bool) (string, error) { debug("Loading image %s", img.String()) am, err := cfg.Store.GetImageManifest(img.String()) if err != nil { - return errwrap.Wrap(errors.New("error getting the manifest"), err) + return "", errwrap.Wrap(errors.New("error getting the manifest"), err) } if _, hasOS := am.Labels.Get("os"); !hasOS { - return fmt.Errorf("missing os label in the image manifest") + return "", fmt.Errorf("missing os label in the image manifest") } + if _, hasArch := am.Labels.Get("arch"); !hasArch { - return fmt.Errorf("missing arch label in the image manifest") + return "", fmt.Errorf("missing arch label in the image manifest") } if err := types.IsValidOSArch(am.Labels.ToMap(), ValidOSArch); err != nil { - return err + return "", err } appInfoDir := common.AppInfoPath(cdir, appName) if err := os.MkdirAll(appInfoDir, common.DefaultRegularDirPerm); err != nil { - return errwrap.Wrap(errors.New("error creating apps info directory"), err) + return "", errwrap.Wrap(errors.New("error creating apps info directory"), err) } + var treeStoreID string if useOverlay { if cfg.PrivateUsers.Shift > 0 { - return fmt.Errorf("cannot use both overlay and user namespace: not implemented yet. (Try --no-overlay)") + return "", fmt.Errorf("cannot use both overlay and user namespace: not implemented yet. (Try --no-overlay)") } - treeStoreID, _, err := cfg.TreeStore.Render(img.String(), false) + + treeStoreID, _, err = cfg.TreeStore.Render(img.String(), false) if err != nil { - return errwrap.Wrap(errors.New("error rendering tree image"), err) + return "", errwrap.Wrap(errors.New("error rendering tree image"), err) } if !cfg.SkipTreeStoreCheck { @@ -772,39 +776,42 @@ func prepareAppImage(cfg PrepareConfig, appName types.ACName, img types.Hash, cd var err error treeStoreID, hash, err = cfg.TreeStore.Render(img.String(), true) if err != nil { - return errwrap.Wrap(errors.New("error rendering tree image"), err) + return "", errwrap.Wrap(errors.New("error rendering tree image"), err) } } cfg.CommonConfig.RootHash = hash } if err := ioutil.WriteFile(common.AppTreeStoreIDPath(cdir, appName), []byte(treeStoreID), common.DefaultRegularFilePerm); err != nil { - return errwrap.Wrap(errors.New("error writing app treeStoreID"), err) + return "", errwrap.Wrap(errors.New("error writing app treeStoreID"), err) } } else { ad := common.AppPath(cdir, appName) + err := os.MkdirAll(ad, common.DefaultRegularDirPerm) if err != nil { - return errwrap.Wrap(errors.New("error creating image directory"), err) + return "", errwrap.Wrap(errors.New("error creating image directory"), err) } shiftedUid, shiftedGid, err := cfg.PrivateUsers.ShiftRange(uint32(os.Getuid()), uint32(os.Getgid())) if err != nil { - return errwrap.Wrap(errors.New("error getting uid, gid"), err) + return "", errwrap.Wrap(errors.New("error getting uid, gid"), err) } if err := os.Chown(ad, int(shiftedUid), int(shiftedGid)); err != nil { - return errwrap.Wrap(fmt.Errorf("error shifting app %q's stage2 dir", appName), err) + return "", errwrap.Wrap(fmt.Errorf("error shifting app %q's stage2 dir", appName), err) } if err := aci.RenderACIWithImageID(img, ad, cfg.Store, cfg.PrivateUsers); err != nil { - return errwrap.Wrap(errors.New("error rendering ACI"), err) + return "", errwrap.Wrap(errors.New("error rendering ACI"), err) } } + if err := writeManifest(*cfg.CommonConfig, img, appInfoDir); err != nil { - return err + return "", errwrap.Wrap(errors.New("error writing manifest"), err) } - return nil + + return treeStoreID, nil } // setupAppImage mounts the overlay filesystem for the app image that From 1babb4fb2188e0b32a3b6b28d5b74401ee29541a Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Thu, 29 Sep 2016 11:11:15 +0200 Subject: [PATCH 23/32] app-rm: introduce RmConfig This introduces a dedicated RmConfig synchronous to all the other *App methods. --- stage0/app.go | 53 +++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/stage0/app.go b/stage0/app.go index eeb923b19a..de346349bb 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -66,6 +66,14 @@ type AddConfig struct { InsecureSeccomp bool } +type RmConfig struct { + *CommonConfig + PodPath string + UsesOverlay bool + AppName *types.ACName + PodPID int +} + func AddApp(cfg AddConfig) error { // there should be only one app in the config app := cfg.Apps.Last() @@ -359,9 +367,8 @@ func callEntrypoint(dir, entrypoint string, args []string) error { return nil } -// TODO(iaguis): RmConfig? -func RmApp(dir string, uuid *types.UUID, usesOverlay bool, appName *types.ACName, podPID int) error { - p, err := stage1types.LoadPod(dir, uuid) +func RmApp(cfg RmConfig) error { + p, err := stage1types.LoadPod(cfg.PodPath, cfg.UUID) if err != nil { return errwrap.Wrap(errors.New("error loading pod manifest"), err) } @@ -381,33 +388,33 @@ func RmApp(dir string, uuid *types.UUID, usesOverlay bool, appName *types.ACName return errors.New("immutable pod: cannot remove application") } - app := pm.Apps.Get(*appName) + app := pm.Apps.Get(*cfg.AppName) if app == nil { - return fmt.Errorf("error: nonexistent app %q", *appName) + return fmt.Errorf("error: nonexistent app %q", *cfg.AppName) } - treeStoreID, err := ioutil.ReadFile(common.AppTreeStoreIDPath(dir, *appName)) + treeStoreID, err := ioutil.ReadFile(common.AppTreeStoreIDPath(cfg.PodPath, *cfg.AppName)) if err != nil { return err } - eep, err := getStage1Entrypoint(dir, enterEntrypoint) + eep, err := getStage1Entrypoint(cfg.PodPath, enterEntrypoint) if err != nil { return errwrap.Wrap(errors.New("error determining 'enter' entrypoint"), err) } - if podPID > 0 { + if cfg.PodPID > 0 { // Call app-stop and app-rm entrypoint only if the pod is still running. // Otherwise, there's not much we can do about it except unmounting/removing // the file system. args := []string{ - uuid.String(), - appName.String(), - filepath.Join(common.Stage1RootfsPath(dir), eep), - strconv.Itoa(podPID), + cfg.UUID.String(), + cfg.AppName.String(), + filepath.Join(common.Stage1RootfsPath(cfg.PodPath), eep), + strconv.Itoa(cfg.PodPID), } - if err := callEntrypoint(dir, appStopEntrypoint, args); err != nil { + if err := callEntrypoint(cfg.PodPath, appStopEntrypoint, args); err != nil { status, err := common.GetExitStatus(err) // ignore nonexistent units failing to stop. Exit status 5 // comes from systemctl and means the unit doesn't exist @@ -418,45 +425,45 @@ func RmApp(dir string, uuid *types.UUID, usesOverlay bool, appName *types.ACName } } - if err := callEntrypoint(dir, appRmEntrypoint, args); err != nil { + if err := callEntrypoint(cfg.PodPath, appRmEntrypoint, args); err != nil { return err } } - appInfoDir := common.AppInfoPath(dir, *appName) + appInfoDir := common.AppInfoPath(cfg.PodPath, *cfg.AppName) if err := os.RemoveAll(appInfoDir); err != nil { return errwrap.Wrap(errors.New("error removing app info directory"), err) } - if usesOverlay { - appRootfs := common.AppRootfsPath(dir, *appName) + if cfg.UsesOverlay { + appRootfs := common.AppRootfsPath(cfg.PodPath, *cfg.AppName) if err := syscall.Unmount(appRootfs, 0); err != nil { return err } - ts := filepath.Join(dir, "overlay", string(treeStoreID)) + ts := filepath.Join(cfg.PodPath, "overlay", string(treeStoreID)) if err := os.RemoveAll(ts); err != nil { return errwrap.Wrap(errors.New("error removing app info directory"), err) } } - if err := os.RemoveAll(common.AppPath(dir, *appName)); err != nil { + if err := os.RemoveAll(common.AppPath(cfg.PodPath, *cfg.AppName)); err != nil { return err } - appStatusPath := filepath.Join(common.Stage1RootfsPath(dir), "rkt", "status", appName.String()) + appStatusPath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "status", cfg.AppName.String()) if err := os.Remove(appStatusPath); err != nil && !os.IsNotExist(err) { return err } - envPath := filepath.Join(common.Stage1RootfsPath(dir), "rkt", "env", appName.String()) + envPath := filepath.Join(common.Stage1RootfsPath(cfg.PodPath), "rkt", "env", cfg.AppName.String()) if err := os.Remove(envPath); err != nil && !os.IsNotExist(err) { return err } - removeAppFromPodManifest(pm, appName) + removeAppFromPodManifest(pm, cfg.AppName) - if err := updatePodManifest(dir, pm); err != nil { + if err := updatePodManifest(cfg.PodPath, pm); err != nil { return err } From 22517acb3416e4a7524e1bc4a8a28f0a13dca527 Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Tue, 4 Oct 2016 13:42:16 +0200 Subject: [PATCH 24/32] Documentation: specify app subcommands synchronization This specifies the synchronization of pod mutation operations using a file lock. --- Documentation/devel/pod-lifecycle.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Documentation/devel/pod-lifecycle.md b/Documentation/devel/pod-lifecycle.md index 7d1c6003da..6fe748d359 100644 --- a/Documentation/devel/pod-lifecycle.md +++ b/Documentation/devel/pod-lifecycle.md @@ -37,6 +37,25 @@ To prevent the period between first creating a pod's directory and acquiring its | ExitedGarbage | "$var/exited-garbage/$uuid" | exited+deleting | exited+gc-marked | | Garbage | "$var/garbage/$uuid" | prepare-failed+deleting | prepare-failed+gc-marked | +## App + +The `rkt app` family of subcommands allow mutating operations on a running pod, namely adding, starting, stopping, and removing applications. +The `rkt app sandbox` subcommand transitions to the Run phase as described above, whereas the remaining subcommands mutate the pod while staying in the Run phase. +To synchronize operations inside the Run phase an additional advisory lock `$var/run/$uuid/pod.lck` is being introduced. +Locking on the `$var/run/$uuid/pod` manifest won't work because changes on it need to be atomic, realized by overwriting the original manifest. +If this file is locked, the pod is undergoing a mutation. Note that only `rkt add/rm` operations are synchronized. +To retain consistency for all other operations (i.e. `rkt list`) that need to read the `$var/run/$uuid/pod` manifest all mutating operations are atomic. + +The `app add/start/stop/rm` subcommands all run within the Run phase where the exclusive advisory lock on the `$var/run/$uuid` directory is held by the systemd-nspawn process. +The following table gives an overview of the states when a lock on `$var/run/$uuid/pod.lck` is being held: + +| Phase | Locked exclusively | Unlocked | +|--------|--------------------|----------| +| Add | adding | added | +| Start | - | - | +| Stop | - | - | +| Remove | removing | removed | + These phases, their function, and how they proceed through their respective states is explained in more detail below. ## Embryo From 953282c025d78fd6d044347b270563b7dd572d0a Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Tue, 4 Oct 2016 13:43:46 +0200 Subject: [PATCH 25/32] app: implement synchronization of pod mutation operations --- common/common.go | 7 +++++++ stage0/app.go | 15 +++++++++++++++ stage0/run.go | 7 +++++++ 3 files changed, 29 insertions(+) diff --git a/common/common.go b/common/common.go index dbf553ddf1..7bb86077a2 100644 --- a/common/common.go +++ b/common/common.go @@ -102,6 +102,13 @@ func PodManifestPath(root string) string { return filepath.Join(root, "pod") } +// PodManifestLockPath returns the path in root to the Pod Manifest lock file. +// This must be different from the PodManifestPath since mutations on the pod manifest file +// happen by overwriting the original file. +func PodManifestLockPath(root string) string { + return filepath.Join(root, "pod.lck") +} + // AppsStatusesPath returns the path of the status dir for all apps. func AppsStatusesPath(root string) string { return filepath.Join(Stage1RootfsPath(root), "/rkt/status") diff --git a/stage0/app.go b/stage0/app.go index de346349bb..689b26ccae 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -29,6 +29,7 @@ import ( "github.com/coreos/rkt/common" "github.com/coreos/rkt/common/apps" + "github.com/coreos/rkt/pkg/lock" "github.com/coreos/rkt/pkg/user" // FIXME this should not be in stage1 anymore stage1types "github.com/coreos/rkt/stage1/common/types" @@ -99,6 +100,13 @@ func AddApp(cfg AddConfig) error { } } + debug("locking pod") + l, err := lock.ExclusiveLock(common.PodManifestLockPath(cfg.PodPath), lock.RegFile) + if err != nil { + return errwrap.Wrap(errors.New("failed to lock pod"), err) + } + defer l.Close() + p, err := stage1types.LoadPod(cfg.PodPath, cfg.UUID) if err != nil { return errwrap.Wrap(errors.New("error loading pod manifest"), err) @@ -368,6 +376,13 @@ func callEntrypoint(dir, entrypoint string, args []string) error { } func RmApp(cfg RmConfig) error { + debug("locking pod") + l, err := lock.ExclusiveLock(common.PodManifestLockPath(cfg.PodPath), lock.RegFile) + if err != nil { + return errwrap.Wrap(errors.New("failed to lock pod"), err) + } + defer l.Close() + p, err := stage1types.LoadPod(cfg.PodPath, cfg.UUID) if err != nil { return errwrap.Wrap(errors.New("error loading pod manifest"), err) diff --git a/stage0/run.go b/stage0/run.go index 0502452cfd..86d3a3df72 100644 --- a/stage0/run.go +++ b/stage0/run.go @@ -481,6 +481,13 @@ func Prepare(cfg PrepareConfig, dir string, uuid *types.UUID) error { cfg.CommonConfig.ManifestData = string(pmb) + // create pod lock file for app add/rm operations. + f, err := os.OpenFile(common.PodManifestLockPath(dir), os.O_CREATE|os.O_RDWR, 0600) + if err != nil { + return err + } + f.Close() + debug("Writing pod manifest") fn := common.PodManifestPath(dir) if err := ioutil.WriteFile(fn, pmb, common.DefaultRegularFilePerm); err != nil { From 5b3f4db78825d0e8ac9faca1765d8251f9766fdf Mon Sep 17 00:00:00 2001 From: Casey Callendrello Date: Fri, 7 Oct 2016 15:22:34 +0200 Subject: [PATCH 26/32] CRI: Support volume creation at app add time Squashed commit of the following: commit 0b348ab71a2b3a5a576a1129458487fb957f0e43 Merge: ce991fc 2ef0885 Author: Casey Callendrello Date: Fri Oct 7 15:08:00 2016 +0200 Merge remote-tracking branch 'origin/cri' into cri-volumes commit ce991fcf1bbce5fb22b519e88eb0f0cfd6dbaca6 Author: Casey Callendrello Date: Tue Oct 4 20:24:59 2016 +0200 WIP: support app volumes commit dca86e7a7e31238e0891ade140ed4a470243bf17 Merge: 081300c b9ddcc6 Author: Casey Callendrello Date: Tue Oct 4 12:23:06 2016 +0200 Merge remote-tracking branch 'origin/cri' into cri-volumes commit 081300c68b68dd5a2b212d530f852585b64de5bd Author: Alban Crequy Date: Thu Sep 22 12:45:32 2016 +0200 cri: mount volumes in the app This requires systemd with the following fix: https://github.com/systemd/systemd/pull/4152 --- common/apps/apps.go | 3 + rkt/cli_apps.go | 62 ++++++++ stage0/app.go | 1 + stage1/app-add/app-add.go | 2 + stage1/init/common/mount.go | 244 +++++++++++++++++++++++++++---- stage1/init/common/mount_test.go | 189 ++++++++++++++++++++++++ stage1/init/common/pod.go | 43 +++--- stage1/init/common/units.go | 19 ++- stage1/init/init.go | 1 + stage1/init/kvm.go | 35 ++--- 10 files changed, 517 insertions(+), 82 deletions(-) create mode 100644 stage1/init/common/mount_test.go diff --git a/common/apps/apps.go b/common/apps/apps.go index c9e8421ee8..71aebdd148 100644 --- a/common/apps/apps.go +++ b/common/apps/apps.go @@ -135,6 +135,9 @@ func (al *Apps) Validate() error { f := func(mnts []schema.Mount) error { for _, m := range mnts { + if m.AppVolume != nil { // allow app-specific volumes + continue + } if _, ok := vs[m.Volume]; !ok { return fmt.Errorf("dangling mount point %q: volume %q not found", m.Path, m.Volume) } diff --git a/rkt/cli_apps.go b/rkt/cli_apps.go index 4a20235737..dc56183e2b 100644 --- a/rkt/cli_apps.go +++ b/rkt/cli_apps.go @@ -23,6 +23,7 @@ import ( "strings" "github.com/coreos/rkt/common/apps" + "github.com/hashicorp/errwrap" "github.com/appc/spec/schema" "github.com/appc/spec/schema/types" @@ -243,6 +244,67 @@ func (al *appsVolume) String() string { return strings.Join(vs, " ") } +// appMountVolume is for CRI style per-app-volumes +// this is a mount and volume in a single argument +// It is exactly like --volume, but with a "target" param +type appMountVolume apps.Apps + +func (am *appMountVolume) Set(s string) error { + pairs, err := url.ParseQuery(strings.Replace(s, ",", "&", -1)) + if err != nil { + return err + } + + mount := schema.Mount{} + + target, ok := pairs["target"] + if !ok { + return fmt.Errorf("missing target= parameter") + } + if len(target) != 1 { + return fmt.Errorf("label %s with multiple values %q", "target", target) + } + mount.Path = target[0] + + delete(pairs, "target") + + vol, err := types.VolumeFromParams(pairs) + if err != nil { + return errwrap.Wrap(fmt.Errorf("error parsing volume component of MountVolume"), err) + } + + mount.AppVolume = vol + mount.Volume = vol.Name + + as := (*apps.Apps)(am) + if as.Count() == 0 { + return fmt.Errorf("an image is required before any MountVolumes") + } + app := as.Last() + app.Mounts = append(app.Mounts, mount) + return nil +} + +func (am *appMountVolume) String() string { + as := (*apps.Apps)(am) + app := as.Last() + if app == nil { + return "" + } + out := "" + for _, mnt := range app.Mounts { + if mnt.AppVolume == nil { + continue + } + out = fmt.Sprintf("%s target=%s,%s", out, mnt.Path, mnt.AppVolume.String()) + } + return out +} + +func (am *appMountVolume) Type() string { + return "appMountVolume" +} + // appMemoryLimit is for --memory flags in the form of: --memory=128M type appMemoryLimit apps.Apps diff --git a/stage0/app.go b/stage0/app.go index 689b26ccae..4d172f9124 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -186,6 +186,7 @@ func AddApp(cfg AddConfig) error { ID: cfg.Image, Labels: am.Labels, }, + Mounts: MergeMounts(cfg.Apps.Mounts, app.Mounts), ReadOnlyRootFS: app.ReadOnlyRootFS, } diff --git a/stage1/app-add/app-add.go b/stage1/app-add/app-add.go index e3a235442c..473aaa0186 100644 --- a/stage1/app-add/app-add.go +++ b/stage1/app-add/app-add.go @@ -128,6 +128,8 @@ func main() { } } + stage1initcommon.AppAddMounts(p, ra, enterCmd) + /* write service file */ binPath, err := stage1initcommon.FindBinPath(p, ra) if err != nil { diff --git a/stage1/init/common/mount.go b/stage1/init/common/mount.go index 749b9014c9..8dc80c0cf1 100644 --- a/stage1/init/common/mount.go +++ b/stage1/init/common/mount.go @@ -16,7 +16,9 @@ package common import ( "fmt" + "io/ioutil" "os" + "os/exec" "path/filepath" "strconv" "syscall" @@ -27,6 +29,8 @@ import ( "github.com/appc/spec/schema" "github.com/appc/spec/schema/types" "github.com/hashicorp/errwrap" + + stage1commontypes "github.com/coreos/rkt/stage1/common/types" ) /* @@ -38,32 +42,19 @@ import ( // mountWrapper is a wrapper around a schema.Mount with an additional field indicating // whether it is an implicit empty volume converted from a Docker image. type mountWrapper struct { - schema.Mount + Mount schema.Mount + Volume types.Volume DockerImplicit bool + ReadOnly bool } -func isMPReadOnly(mountPoints []types.MountPoint, name types.ACName) bool { - for _, mp := range mountPoints { - if mp.Name == name { - return mp.ReadOnly - } +// ConvertedFromDocker determines if an app's image has been converted +// from docker. This is needed because implicit docker empty volumes have +// different behavior from AppC +func ConvertedFromDocker(im *schema.ImageManifest) bool { + if im == nil { // nil sometimes sneaks in here due to unit tests + return false } - - return false -} - -// IsMountReadOnly returns if a mount should be readOnly. -// If the readOnly flag in the pod manifest is not nil, it overrides the -// readOnly flag in the image manifest. -func IsMountReadOnly(vol types.Volume, mountPoints []types.MountPoint) bool { - if vol.ReadOnly != nil { - return *vol.ReadOnly - } - - return isMPReadOnly(mountPoints, vol.Name) -} - -func convertedFromDocker(im *schema.ImageManifest) bool { ann := im.Annotations _, ok := ann.Get("appc.io/docker/repository") return ok @@ -72,27 +63,69 @@ func convertedFromDocker(im *schema.ImageManifest) bool { // GenerateMounts maps MountPoint paths to volumes, returning a list of mounts, // each with a parameter indicating if it's an implicit empty volume from a // Docker image. -func GenerateMounts(ra *schema.RuntimeApp, volumes map[types.ACName]types.Volume, imageManifest *schema.ImageManifest) []mountWrapper { +func GenerateMounts(ra *schema.RuntimeApp, podVolumes []types.Volume, convertedFromDocker bool) ([]mountWrapper, error) { app := ra.App var genMnts []mountWrapper + vols := make(map[types.ACName]types.Volume) + for _, v := range podVolumes { + vols[v.Name] = v + } + + // RuntimeApps have mounts, whereas Apps have mountPoints. mountPoints are partially for + // Docker compat; since apps can declare mountpoints. However, if we just run with rkt run, + // then we'll only have a Mount and no corresponding MountPoint. + // Furthermore, Mounts can have embedded volumes in the case of the CRI. + // So, we generate a pile of Mounts and their corresponding Volume + + // Map of hostpath -> Mount mnts := make(map[string]schema.Mount) + + // Check runtimeApp's Mounts for _, m := range ra.Mounts { mnts[m.Path] = m + + vol := m.AppVolume // Mounts can supply a volume + if vol == nil { + vv, ok := vols[m.Volume] + if !ok { + return nil, fmt.Errorf("could not find volume %s", m.Volume) + } + vol = &vv + } + + // Find a corresponding MountPoint, which is optional + ro := false + for _, mp := range ra.App.MountPoints { + if mp.Name == m.Volume { + ro = mp.ReadOnly + break + } + } + if vol.ReadOnly != nil { + ro = *vol.ReadOnly + } + genMnts = append(genMnts, mountWrapper{ Mount: m, DockerImplicit: false, + ReadOnly: ro, + Volume: *vol, }) } + // Now, match up MountPoints with Mounts or Volumes + // If there's no Mount and no Volume, generate an empty volume for _, mp := range app.MountPoints { - // there's already an injected mount for this target path, skip + // there's already a Mount for this MountPoint, stop if _, ok := mnts[mp.Path]; ok { continue } - vol, ok := volumes[mp.Name] + + // No Mount, try to match based on volume name + vol, ok := vols[mp.Name] // there is no volume for this mount point, creating an "empty" volume // implicitly if !ok { @@ -108,34 +141,41 @@ func GenerateMounts(ra *schema.RuntimeApp, volumes map[types.ACName]types.Volume GID: &defaultGID, } - dockerImplicit := convertedFromDocker(imageManifest) log.Printf("warning: no volume specified for mount point %q, implicitly creating an \"empty\" volume. This volume will be removed when the pod is garbage-collected.", mp.Name) - if dockerImplicit { + if convertedFromDocker { log.Printf("Docker converted image, initializing implicit volume with data contained at the mount point %q.", mp.Name) } - volumes[uniqName] = emptyVol + vols[uniqName] = emptyVol genMnts = append(genMnts, mountWrapper{ Mount: schema.Mount{ Volume: uniqName, Path: mp.Path, }, - DockerImplicit: dockerImplicit, + Volume: emptyVol, + ReadOnly: mp.ReadOnly, + DockerImplicit: convertedFromDocker, }) } else { + ro := mp.ReadOnly + if vol.ReadOnly != nil { + ro = *vol.ReadOnly + } genMnts = append(genMnts, mountWrapper{ Mount: schema.Mount{ Volume: vol.Name, Path: mp.Path, }, + Volume: vol, + ReadOnly: ro, DockerImplicit: false, }) } } - return genMnts + return genMnts, nil } // PrepareMountpoints creates and sets permissions for empty volumes. @@ -238,3 +278,147 @@ func ensureDestinationExists(source, destination string) error { } return nil } + +func AppAddMounts(p *stage1commontypes.Pod, ra *schema.RuntimeApp, enterCmd []string) { + vols := make(map[types.ACName]types.Volume) + for _, v := range p.Manifest.Volumes { + vols[v.Name] = v + } + + imageManifest := p.Images[ra.Name.String()] + + mounts, err := GenerateMounts(ra, p.Manifest.Volumes, ConvertedFromDocker(imageManifest)) + if err != nil { + log.FatalE("Could not generate mounts", err) + os.Exit(1) + } + + for _, m := range mounts { + AppAddOneMount(p, ra, m.Volume.Source, m.Mount.Path, m.ReadOnly, enterCmd) + } +} + +/* AppAddOneMount bind-mounts "sourcePath" from the host into "dstPath" in + * the container. + * + * We use the propagation mechanism of systemd-nspawn. In all systemd-nspawn + * containers, the directory "/run/systemd/nspawn/propagate/$MACHINE_ID" on + * the host is propagating mounts to the directory + * "/run/systemd/nspawn/incoming/" in the container mount namespace. Once a + * bind mount is propagated, we simply move to its correct location. + * + * The algorithm is the same as in "machinectl bind": + * https://github.com/systemd/systemd/blob/v231/src/machine/machine-dbus.c#L865 + * except that we don't use setns() to enter the mount namespace of the pod + * because Linux does not allow multithreaded applications (such as Go + * programs) to change mount namespaces with setns. Instead, we fork another + * process written in C (single-threaded) to enter the mount namespace. The + * command used is specified by the "enterCmd" parameter. + * + * Users might request a bind mount to be set up read-only. This complicates + * things a bit because on Linux, setting up a read-only bind mount involves + * two mount() calls, so it is not atomic. We don't want the container to see + * the mount in read-write mode, even for a short time, so we don't create the + * bind mount directly in "/run/systemd/nspawn/propagate/$MACHINE_ID" to avoid + * an immediate propagation to the container. Instead, we create a temporary + * playground in "/tmp/rkt.propagate.XXXX" and create the bind mount in + * "/tmp/rkt.propagate.XXXX/mount" with the correct read-only attribute before + * moving it. + * + * Another complication is that the playground cannot be on a shared mount + * because Linux does not allow MS_MOVE to be applied to mounts with MS_SHARED + * parent mounts. But by default, systemd mounts everything as shared, see: + * https://github.com/systemd/systemd/blob/v231/src/core/mount-setup.c#L392 + * We set up the temporary playground as a slave bind mount to avoid this + * limitation. + */ +func AppAddOneMount(p *stage1commontypes.Pod, ra *schema.RuntimeApp, sourcePath string, dstPath string, readOnly bool, enterCmd []string) { + /* The general plan: + * - bind-mount sourcePath to mountTmp + * - MS_MOVE mountTmp to mountOutside, the systemd propagate dir + * - systemd moves mountOutside to mountInside + * - in the stage1 namespace, bind mountInside to the app's rootfs + */ + + /* Prepare a temporary playground that is not a shared mount */ + playgroundMount, err := ioutil.TempDir("", "rkt.propagate.") + if err != nil { + log.FatalE("error creating temporary propagation directory", err) + os.Exit(1) + } + defer os.Remove(playgroundMount) + + err = syscall.Mount(playgroundMount, playgroundMount, "bind", syscall.MS_BIND, "") + if err != nil { + log.FatalE("error mounting temporary directory", err) + os.Exit(1) + } + defer syscall.Unmount(playgroundMount, 0) + + err = syscall.Mount("", playgroundMount, "none", syscall.MS_SLAVE, "") + if err != nil { + log.FatalE("error mounting temporary directory", err) + os.Exit(1) + } + + /* Bind mount the source into the playground, possibly read-only */ + mountTmp := filepath.Join(playgroundMount, "mount") + if err := ensureDestinationExists(sourcePath, mountTmp); err != nil { + log.FatalE("error creating temporary mountpoint", err) + os.Exit(1) + } + defer os.Remove(mountTmp) + + err = syscall.Mount(sourcePath, mountTmp, "bind", syscall.MS_BIND, "") + if err != nil { + log.FatalE("error mounting temporary mountpoint", err) + os.Exit(1) + } + defer syscall.Unmount(mountTmp, 0) + + if readOnly { + err = syscall.Mount("", mountTmp, "bind", syscall.MS_REMOUNT|syscall.MS_RDONLY|syscall.MS_BIND, "") + if err != nil { + log.FatalE("error remounting temporary mountpoint read-only", err) + os.Exit(1) + } + } + + /* Now that the bind mount has the correct attributes (RO or RW), move + * it to the propagation directory prepared by systemd-nspawn */ + mountOutside := filepath.Join("/run/systemd/nspawn/propagate/", "rkt-"+p.UUID.String(), "rkt.mount") + mountInside := filepath.Join("/run/systemd/nspawn/incoming/", filepath.Base(mountOutside)) + if err := ensureDestinationExists(sourcePath, mountOutside); err != nil { + log.FatalE("error creating propagate mountpoint", err) + os.Exit(1) + } + defer os.Remove(mountOutside) + + err = syscall.Mount(mountTmp, mountOutside, "", syscall.MS_MOVE, "") + if err != nil { + log.FatalE("error moving temporary mountpoint to propagate directory", err) + os.Exit(1) + } + defer syscall.Unmount(mountOutside, 0) + + /* Finally move the bind mount at the correct place inside the container. */ + mountDst := filepath.Join("/opt/stage2", ra.Name.String(), "rootfs", dstPath) + mountDstOutside := filepath.Join(p.Root, "stage1/rootfs", mountDst) + if err := ensureDestinationExists(sourcePath, mountDstOutside); err != nil { + log.FatalE("error creating destination directory", err) + os.Exit(1) + } + + args := enterCmd + args = append(args, "/bin/mount", "--move", mountInside, mountDst) + + cmd := exec.Cmd{ + Path: args[0], + Args: args, + } + + if err := cmd.Run(); err != nil { + log.PrintE("error executing mount move", err) + os.Exit(1) + } +} diff --git a/stage1/init/common/mount_test.go b/stage1/init/common/mount_test.go new file mode 100644 index 0000000000..da87d2bee8 --- /dev/null +++ b/stage1/init/common/mount_test.go @@ -0,0 +1,189 @@ +// Copyright 2014 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +import ( + "reflect" + "testing" + + "github.com/kr/pretty" + + "github.com/appc/spec/schema" + "github.com/appc/spec/schema/types" +) + +func TestGenerateMounts(t *testing.T) { + tests := []struct { + ra *schema.RuntimeApp + vols []types.Volume + fromDocker bool + hasErr bool + expected []mountWrapper + }{ + { // Test matching ra.mount to volume via name w/o/ mountpoint + ra: &schema.RuntimeApp{ + Mounts: []schema.Mount{ + { + Volume: *types.MustACName("foo-mount"), + Path: "/app/foo", + }, + }, + App: &types.App{ + MountPoints: nil, + }, + }, + vols: []types.Volume{ + { + Name: *types.MustACName("foo-mount"), + Kind: "host", + Source: "/host/foo", + ReadOnly: &falseVar, + }, + }, + fromDocker: false, + hasErr: false, + expected: []mountWrapper{ + { + Mount: schema.Mount{ + Volume: *types.MustACName("foo-mount"), + Path: "/app/foo", + }, + Volume: types.Volume{ + Name: *types.MustACName("foo-mount"), + Kind: "host", + Source: "/host/foo", + ReadOnly: &falseVar, + }, + DockerImplicit: false, + ReadOnly: false, + }, + }, + }, + { // Test matching app's mountpoint to a volume w/o a mount + ra: &schema.RuntimeApp{ + Mounts: nil, + App: &types.App{ + MountPoints: []types.MountPoint{ + { + Name: *types.MustACName("foo-mp"), + Path: "/app/foo-mp", + ReadOnly: false, + }, + }, + }, + }, + vols: []types.Volume{ + { + Name: *types.MustACName("foo-mount"), + Kind: "host", + Source: "/host/foo", + ReadOnly: &falseVar, + }, + { + Name: *types.MustACName("foo-mp"), + Kind: "host", + Source: "/host/bar", + ReadOnly: &falseVar, + }, + }, + fromDocker: false, + hasErr: false, + expected: []mountWrapper{ + { + Mount: schema.Mount{ + Volume: *types.MustACName("foo-mp"), + Path: "/app/foo-mp", + }, + Volume: types.Volume{ + Name: *types.MustACName("foo-mp"), + Kind: "host", + Source: "/host/bar", + ReadOnly: &falseVar, + }, + DockerImplicit: false, + ReadOnly: false, + }, + }, + }, + { // Test that app's Mount can override the volume + ra: &schema.RuntimeApp{ + Mounts: []schema.Mount{ + { + Volume: *types.MustACName("foo-mount"), + Path: "/app/foo", + AppVolume: &types.Volume{ + Name: *types.MustACName("foo-mount"), + Kind: "host", + Source: "/host/overridden", + ReadOnly: nil, + }, + }, + }, + + App: &types.App{ + MountPoints: nil, + }, + }, + vols: []types.Volume{ + { + Name: *types.MustACName("foo-mount"), + Kind: "host", + Source: "/host/foo", + ReadOnly: &falseVar, + }, + { + Name: *types.MustACName("foo-mp"), + Kind: "host", + Source: "/host/bar", + ReadOnly: &falseVar, + }, + }, + fromDocker: false, + hasErr: false, + expected: []mountWrapper{ + { + Mount: schema.Mount{ + Volume: *types.MustACName("foo-mount"), + Path: "/app/foo", + AppVolume: &types.Volume{ + Name: *types.MustACName("foo-mount"), + Kind: "host", + Source: "/host/overridden", + ReadOnly: nil, + }, + }, + Volume: types.Volume{ + Name: *types.MustACName("foo-mount"), + Kind: "host", + Source: "/host/overridden", + ReadOnly: nil, + }, + DockerImplicit: false, + ReadOnly: false, + }, + }, + }, + } + + for i, tt := range tests { + result, err := GenerateMounts(tt.ra, tt.vols, tt.fromDocker) + if (err != nil) != tt.hasErr { + t.Errorf("test %d expected error status %t, didn't get it", i, tt.hasErr) + } + if !reflect.DeepEqual(result, tt.expected) { + t.Errorf("test %d, result != expected, %+v", i, pretty.Diff(tt.expected, result)) + } + } +} diff --git a/stage1/init/common/pod.go b/stage1/init/common/pod.go index b521c856b7..f0b0d53d15 100644 --- a/stage1/init/common/pod.go +++ b/stage1/init/common/pod.go @@ -309,7 +309,7 @@ func FindBinPath(p *stage1commontypes.Pod, ra *schema.RuntimeApp) (string, error // node, we create a symlink to its target in "/rkt/volumes". Later, // prepare-app will copy those to "/dev/.rkt/" so that's what we use in the // DeviceAllow= line. -func generateDeviceAllows(root string, appName types.ACName, mountPoints []types.MountPoint, mounts []mountWrapper, vols map[types.ACName]types.Volume, uidRange *user.UidRange) ([]string, error) { +func generateDeviceAllows(root string, appName types.ACName, mountPoints []types.MountPoint, mounts []mountWrapper, uidRange *user.UidRange) ([]string, error) { var devAllow []string rktVolumeLinksPath := filepath.Join(root, "rkt", "volumes") @@ -321,21 +321,20 @@ func generateDeviceAllows(root string, appName types.ACName, mountPoints []types } for _, m := range mounts { - v := vols[m.Volume] - if v.Kind != "host" { + if m.Volume.Kind != "host" { continue } - if fileutil.IsDeviceNode(v.Source) { + if fileutil.IsDeviceNode(m.Volume.Source) { mode := "r" - if !IsMountReadOnly(v, mountPoints) { + if !m.ReadOnly { mode += "w" } - tgt := filepath.Join(common.RelAppRootfsPath(appName), m.Path) + tgt := filepath.Join(common.RelAppRootfsPath(appName), m.Mount.Path) // the DeviceAllow= line needs the link path in /dev/.rkt/ - linkRel := filepath.Join("/dev/.rkt", v.Name.String()) + linkRel := filepath.Join("/dev/.rkt", m.Volume.Name.String()) // the real link should be in /rkt/volumes for now - link := filepath.Join(rktVolumeLinksPath, v.Name.String()) + link := filepath.Join(rktVolumeLinksPath, m.Volume.Name.String()) err := os.Symlink(tgt, link) // if the link already exists, we don't need to do anything @@ -484,11 +483,13 @@ func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp, insecureOp } imageManifest := p.Images[appName.String()] - mounts := GenerateMounts(ra, vols, imageManifest) + mounts, err := GenerateMounts(ra, p.Manifest.Volumes, ConvertedFromDocker(imageManifest)) + if err != nil { + return nil, errwrap.Wrap(fmt.Errorf("could not generate app %q mounts", appName), err) + } for _, m := range mounts { - vol := vols[m.Volume] - shPath := filepath.Join(sharedVolPath, vol.Name.String()) + shPath := filepath.Join(sharedVolPath, m.Volume.Name.String()) absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. if err != nil { @@ -500,31 +501,31 @@ func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp, insecureOp // TODO(yifan): This is a temporary fix for systemd-nspawn not handling symlink mounts well. // Could be removed when https://github.com/systemd/systemd/issues/2860 is resolved, and systemd // version is bumped. - mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path) + mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Mount.Path) if err != nil { - return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Path), err) + return nil, errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err) } mntAbsPath := filepath.Join(appRootfs, mntPath) - if err := PrepareMountpoints(shPath, mntAbsPath, &vol, m.DockerImplicit); err != nil { + if err := PrepareMountpoints(shPath, mntAbsPath, &m.Volume, m.DockerImplicit); err != nil { return nil, err } opt := make([]string, 6) - if IsMountReadOnly(vol, app.MountPoints) { + if m.ReadOnly { opt[0] = "--bind-ro=" } else { opt[0] = "--bind=" } - switch vol.Kind { + switch m.Volume.Kind { case "host": - opt[1] = vol.Source + opt[1] = m.Volume.Source case "empty": - opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), vol.Name.String()) + opt[1] = filepath.Join(common.SharedVolumesPath(absRoot), m.Volume.Name.String()) default: - return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, vol.Kind) + return nil, fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, m.Volume.Kind) } opt[2] = ":" opt[3] = filepath.Join(common.RelAppRootfsPath(appName), mntPath) @@ -532,8 +533,8 @@ func appToNspawnArgs(p *stage1commontypes.Pod, ra *schema.RuntimeApp, insecureOp // If Recursive is not set, default to recursive. recursive := true - if vol.Recursive != nil { - recursive = *vol.Recursive + if m.Volume.Recursive != nil { + recursive = *m.Volume.Recursive } // rbind/norbind options exist since systemd-nspawn v226 diff --git a/stage1/init/common/units.go b/stage1/init/common/units.go index 768fe6588d..24515b36d7 100644 --- a/stage1/init/common/units.go +++ b/stage1/init/common/units.go @@ -402,12 +402,6 @@ func (uw *UnitWriter) AppUnit( opts = append(opts, unit.NewUnitOption("Service", "ReadOnlyDirectories", common.RelAppRootfsPath(appName))) } - // TODO(tmrts): Extract this logic into a utility function. - vols := make(map[types.ACName]types.Volume) - for _, v := range uw.p.Manifest.Volumes { - vols[v.Name] = v - } - absRoot, err := filepath.Abs(uw.p.Root) // Absolute path to the pod's rootfs. if err != nil { uw.err = err @@ -417,15 +411,20 @@ func (uw *UnitWriter) AppUnit( rwDirs := []string{} imageManifest := uw.p.Images[appName.String()] - mounts := GenerateMounts(ra, vols, imageManifest) + mounts, err := GenerateMounts(ra, uw.p.Manifest.Volumes, ConvertedFromDocker(imageManifest)) + if err != nil { + uw.err = err + return + } + for _, m := range mounts { - mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Path) + mntPath, err := EvaluateSymlinksInsideApp(appRootfs, m.Mount.Path) if err != nil { uw.err = err return } - if !IsMountReadOnly(vols[m.Volume], app.MountPoints) { + if !m.ReadOnly { rwDirs = append(rwDirs, filepath.Join(common.RelAppRootfsPath(appName), mntPath)) } } @@ -435,7 +434,7 @@ func (uw *UnitWriter) AppUnit( if !insecureOptions.DisablePaths && flavor != "kvm" { opts = protectSystemFiles(opts, appName) opts = append(opts, unit.NewUnitOption("Service", "DevicePolicy", "closed")) - deviceAllows, err := generateDeviceAllows(common.Stage1RootfsPath(absRoot), appName, app.MountPoints, mounts, vols, uidRange) + deviceAllows, err := generateDeviceAllows(common.Stage1RootfsPath(absRoot), appName, app.MountPoints, mounts, uidRange) if err != nil { uw.err = err return diff --git a/stage1/init/init.go b/stage1/init/init.go index f040f587da..c3f8269611 100644 --- a/stage1/init/init.go +++ b/stage1/init/init.go @@ -686,6 +686,7 @@ func stage1() int { return 1 } } + diag.Println(args) err = stage1common.WithClearedCloExec(lfd, func() error { return syscall.Exec(args[0], args, env) diff --git a/stage1/init/kvm.go b/stage1/init/kvm.go index a911bacc1c..31e7996ea1 100644 --- a/stage1/init/kvm.go +++ b/stage1/init/kvm.go @@ -25,7 +25,6 @@ import ( "syscall" "github.com/appc/spec/schema" - "github.com/appc/spec/schema/types" "github.com/coreos/go-systemd/util" "github.com/coreos/rkt/common" "github.com/coreos/rkt/networking" @@ -55,13 +54,7 @@ func KvmNetworkingToSystemd(p *stage1commontypes.Pod, n *networking.Networking) } func mountSharedVolumes(root string, p *stage1commontypes.Pod, ra *schema.RuntimeApp) error { - app := ra.App appName := ra.Name - volumes := p.Manifest.Volumes - vols := make(map[types.ACName]types.Volume) - for _, v := range volumes { - vols[v.Name] = v - } sharedVolPath := common.SharedVolumesPath(root) if err := os.MkdirAll(sharedVolPath, stage1initcommon.SharedVolPerm); err != nil { @@ -72,10 +65,11 @@ func mountSharedVolumes(root string, p *stage1commontypes.Pod, ra *schema.Runtim } imageManifest := p.Images[appName.String()] - mounts := stage1initcommon.GenerateMounts(ra, vols, imageManifest) + mounts, err := stage1initcommon.GenerateMounts(ra, p.Manifest.Volumes, stage1initcommon.ConvertedFromDocker(imageManifest)) + if err != nil { + return err + } for _, m := range mounts { - vol := vols[m.Volume] - absRoot, err := filepath.Abs(p.Root) // Absolute path to the pod's rootfs. if err != nil { return errwrap.Wrap(errors.New("could not get pod's root absolute path"), err) @@ -86,32 +80,31 @@ func mountSharedVolumes(root string, p *stage1commontypes.Pod, ra *schema.Runtim return fmt.Errorf(`could not evaluate absolute path for application rootfs in app: %v`, appName) } - mntPath, err := stage1initcommon.EvaluateSymlinksInsideApp(absAppRootfs, m.Path) + mntPath, err := stage1initcommon.EvaluateSymlinksInsideApp(absAppRootfs, m.Mount.Path) if err != nil { - return errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Path), err) + return errwrap.Wrap(fmt.Errorf("could not evaluate path %v", m.Mount.Path), err) } absDestination := filepath.Join(absAppRootfs, mntPath) - shPath := filepath.Join(sharedVolPath, vol.Name.String()) - if err := stage1initcommon.PrepareMountpoints(shPath, absDestination, &vol, m.DockerImplicit); err != nil { + shPath := filepath.Join(sharedVolPath, m.Volume.Name.String()) + if err := stage1initcommon.PrepareMountpoints(shPath, absDestination, &m.Volume, m.DockerImplicit); err != nil { return err } - readOnly := stage1initcommon.IsMountReadOnly(vol, app.MountPoints) var source string - switch vol.Kind { + switch m.Volume.Kind { case "host": - source = vol.Source + source = m.Volume.Source case "empty": - source = filepath.Join(common.SharedVolumesPath(root), vol.Name.String()) + source = filepath.Join(common.SharedVolumesPath(root), m.Volume.Name.String()) default: - return fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, vol.Kind) + return fmt.Errorf(`invalid volume kind %q. Must be one of "host" or "empty"`, m.Volume.Kind) } if cleanedSource, err := filepath.EvalSymlinks(source); err != nil { return errwrap.Wrap(fmt.Errorf("could not resolve symlink for source: %v", source), err) } else if err := ensureDestinationExists(cleanedSource, absDestination); err != nil { return errwrap.Wrap(fmt.Errorf("could not create destination mount point: %v", absDestination), err) - } else if err := doBindMount(cleanedSource, absDestination, readOnly, vol.Recursive); err != nil { - return errwrap.Wrap(fmt.Errorf("could not bind mount path %v (s: %v, d: %v)", m.Path, source, absDestination), err) + } else if err := doBindMount(cleanedSource, absDestination, m.ReadOnly, m.Volume.Recursive); err != nil { + return errwrap.Wrap(fmt.Errorf("could not bind mount path %v (s: %v, d: %v)", m.Mount.Path, source, absDestination), err) } } return nil From 57036ca5ba261a388f607b0bf1b54f518fe1e32a Mon Sep 17 00:00:00 2001 From: Casey Callendrello Date: Thu, 13 Oct 2016 16:52:57 +0200 Subject: [PATCH 27/32] CRI: pick up appc annotation rename --- lib/app.go | 4 ++-- lib/pod.go | 8 ++++---- stage0/app.go | 4 ++-- stage0/run.go | 30 +++++++++++++++--------------- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/lib/app.go b/lib/app.go index eed6d74ddf..9b2cfd3f49 100644 --- a/lib/app.go +++ b/lib/app.go @@ -61,8 +61,8 @@ func newApp(ra *schema.RuntimeApp, podManifest *schema.PodManifest, pod *pkgPod. app := &App{ Name: ra.Name.String(), ImageID: ra.Image.ID.String(), - UserAnnotations: ra.App.CRIAnnotations, - UserLabels: ra.App.CRILabels, + UserAnnotations: ra.App.UserAnnotations, + UserLabels: ra.App.UserLabels, } // Generate mounts diff --git a/lib/pod.go b/lib/pod.go index e4ad6f5e0a..4516bb1942 100644 --- a/lib/pod.go +++ b/lib/pod.go @@ -33,16 +33,16 @@ func NewPodFromInternalPod(p *pkgPod.Pod) (*Pod, error) { pod.AppNames = append(pod.AppNames, app.Name.String()) } - if len(manifest.CRIAnnotations) > 0 { + if len(manifest.UserAnnotations) > 0 { pod.UserAnnotations = make(map[string]string) - for name, value := range manifest.CRIAnnotations { + for name, value := range manifest.UserAnnotations { pod.UserAnnotations[name] = value } } - if len(manifest.CRILabels) > 0 { + if len(manifest.UserLabels) > 0 { pod.UserLabels = make(map[string]string) - for name, value := range manifest.CRILabels { + for name, value := range manifest.UserLabels { pod.UserLabels[name] = value } } diff --git a/stage0/app.go b/stage0/app.go index 4d172f9124..ebd5ec31d9 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -226,11 +226,11 @@ func AddApp(cfg AddConfig) error { } if app.UserAnnotations != nil { - ra.App.CRIAnnotations = app.UserAnnotations + ra.App.UserAnnotations = app.UserAnnotations } if app.UserLabels != nil { - ra.App.CRILabels = app.UserLabels + ra.App.UserLabels = app.UserLabels } if app.Environments != nil { diff --git a/stage0/run.go b/stage0/run.go index 86d3a3df72..d6033624c0 100644 --- a/stage0/run.go +++ b/stage0/run.go @@ -61,17 +61,17 @@ var debugEnabled bool // PrepareConfig defines the configuration parameters required by Prepare type PrepareConfig struct { *CommonConfig - Apps *apps.Apps // apps to prepare - InheritEnv bool // inherit parent environment into apps - ExplicitEnv []string // always set these environment variables for all the apps - EnvFromFile []string // environment variables loaded from files, set for all the apps - Ports []types.ExposedPort // list of ports that rkt will expose on the host - UseOverlay bool // prepare pod with overlay fs - SkipTreeStoreCheck bool // skip checking the treestore before rendering - PodManifest string // use the pod manifest specified by the user, this will ignore flags such as '--volume', '--port', etc. - PrivateUsers *user.UidRange // user namespaces - UserAnnotations types.CRIAnnotations // user annotations for the pod. - UserLabels types.CRILabels // user labels for the pod. + Apps *apps.Apps // apps to prepare + InheritEnv bool // inherit parent environment into apps + ExplicitEnv []string // always set these environment variables for all the apps + EnvFromFile []string // environment variables loaded from files, set for all the apps + Ports []types.ExposedPort // list of ports that rkt will expose on the host + UseOverlay bool // prepare pod with overlay fs + SkipTreeStoreCheck bool // skip checking the treestore before rendering + PodManifest string // use the pod manifest specified by the user, this will ignore flags such as '--volume', '--port', etc. + PrivateUsers *user.UidRange // user namespaces + UserAnnotations types.UserAnnotations // user annotations for the pod. + UserLabels types.UserLabels // user labels for the pod. } // RunConfig defines the configuration parameters needed by Run @@ -280,11 +280,11 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { } if app.UserAnnotations != nil { - ra.App.CRIAnnotations = app.UserAnnotations + ra.App.UserAnnotations = app.UserAnnotations } if app.UserLabels != nil { - ra.App.CRILabels = app.UserLabels + ra.App.UserLabels = app.UserLabels } // loading the environment from the lowest priority to highest @@ -326,8 +326,8 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { Value: strconv.FormatBool(cfg.Mutable), }) - pm.CRIAnnotations = cfg.UserAnnotations - pm.CRILabels = cfg.UserLabels + pm.UserAnnotations = cfg.UserAnnotations + pm.UserLabels = cfg.UserLabels pmb, err := json.Marshal(pm) if err != nil { From b59fa4be3c7f16400851d21c1ecf8800fc916a74 Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Wed, 12 Oct 2016 13:24:00 +0200 Subject: [PATCH 28/32] CRI: add cpu-shares isolator Fixes partially #3242 --- common/apps/apps.go | 1 + rkt/cli_apps.go | 36 ++++++++++++++++++++++++++++++ rkt/run.go | 1 + stage0/run.go | 6 ++++- stage1/init/common/units.go | 5 +++++ tests/inspect/inspect.go | 20 +++++++++++++++++ tests/rkt_app_isolator_test.go | 40 ++++++++++++++++++++++++++++++++++ 7 files changed, 108 insertions(+), 1 deletion(-) diff --git a/common/apps/apps.go b/common/apps/apps.go index 71aebdd148..fe6f42b945 100644 --- a/common/apps/apps.go +++ b/common/apps/apps.go @@ -56,6 +56,7 @@ type App struct { Mounts []schema.Mount // mounts for this app (superseding any mounts in rktApps.mounts of same MountPoint) MemoryLimit *types.ResourceMemory // memory isolator override CPULimit *types.ResourceCPU // cpu isolator override + CPUShares *types.LinuxCPUShares // cpu-shares isolator override User, Group string // user, group overrides SupplementaryGIDs []int // supplementary gids override CapsRetain *types.LinuxCapabilitiesRetainSet // os/linux/capabilities-retain-set overrides diff --git a/rkt/cli_apps.go b/rkt/cli_apps.go index dc56183e2b..a8fa444bef 100644 --- a/rkt/cli_apps.go +++ b/rkt/cli_apps.go @@ -367,6 +367,42 @@ func (aml *appCPULimit) Type() string { return "appCPULimit" } +// appCPUShares is for --cpu-shares flags in the form of: --cpu-shares=2048 +type appCPUShares apps.Apps + +func (aml *appCPUShares) Set(s string) error { + app := (*apps.Apps)(aml).Last() + if app == nil { + return fmt.Errorf("--cpu-shares must follow an image") + } + shares, err := strconv.Atoi(s) + if err != nil { + return err + } + isolator, err := types.NewLinuxCPUShares(shares) + if err != nil { + return err + } + app.CPUShares = isolator + return nil +} + +func (aml *appCPUShares) String() string { + app := (*apps.Apps)(aml).Last() + if app == nil { + return "" + } + shares := app.CPUShares + if shares == nil { + return "" + } + return strconv.Itoa(int(*shares)) +} + +func (aml *appCPUShares) Type() string { + return "appCPUShares" +} + // appUser is for --user flags in the form of: --user=user type appUser apps.Apps diff --git a/rkt/run.go b/rkt/run.go index acbc190a50..721279f672 100644 --- a/rkt/run.go +++ b/rkt/run.go @@ -82,6 +82,7 @@ image arguments with a lone "---" to resume argument parsing.`, func addIsolatorFlags(cmd *cobra.Command, compat bool) { cmd.Flags().Var((*appMemoryLimit)(&rktApps), "memory", "memory limit for the preceding image (example: '--memory=16Mi', '--memory=50M', '--memory=1G')") cmd.Flags().Var((*appCPULimit)(&rktApps), "cpu", "cpu limit for the preceding image (example: '--cpu=500m')") + cmd.Flags().Var((*appCPUShares)(&rktApps), "cpu-shares", "cpu-shares assigns the specified CPU time share weight (example: '--cpu-shares=2048')") cmd.Flags().Var((*appCapsRetain)(&rktApps), "caps-retain", "capability to retain (example: '--caps-retain=CAP_SYS_ADMIN')") cmd.Flags().Var((*appCapsRemove)(&rktApps), "caps-remove", "capability to remove (example: '--caps-remove=CAP_MKNOD')") cmd.Flags().Var((*appSeccompFilter)(&rktApps), "seccomp", "seccomp filter override (example: '--seccomp mode=retain,errno=EPERM,chmod,chown')") diff --git a/stage0/run.go b/stage0/run.go index d6033624c0..9e68e37c8d 100644 --- a/stage0/run.go +++ b/stage0/run.go @@ -320,7 +320,6 @@ func generatePodManifest(cfg PrepareConfig, dir string) ([]byte, error) { return nil, err } - // TODO(sur): add to stage1-implementors-guide and to the spec pm.Annotations = append(pm.Annotations, types.Annotation{ Name: "coreos.com/rkt/stage1/mutable", Value: strconv.FormatBool(cfg.Mutable), @@ -348,6 +347,11 @@ func prepareIsolators(setup *apps.App, app *types.App) error { app.Isolators = append(app.Isolators, isolator) } + if cpuSharesOverride := setup.CPUShares; cpuSharesOverride != nil { + isolator := cpuSharesOverride.AsIsolator() + app.Isolators.ReplaceIsolatorsByName(isolator, []types.ACIdentifier{types.LinuxCPUSharesName}) + } + if oomAdjOverride := setup.OOMScoreAdj; oomAdjOverride != nil { app.Isolators.ReplaceIsolatorsByName(oomAdjOverride.AsIsolator(), []types.ACIdentifier{types.LinuxOOMScoreAdjName}) } diff --git a/stage1/init/common/units.go b/stage1/init/common/units.go index 24515b36d7..6079553592 100644 --- a/stage1/init/common/units.go +++ b/stage1/init/common/units.go @@ -489,6 +489,11 @@ func (uw *UnitWriter) AppUnit( } case *types.LinuxOOMScoreAdj: opts = append(opts, unit.NewUnitOption("Service", "OOMScoreAdjust", strconv.Itoa(int(*v)))) + case *types.LinuxCPUShares: + exit = doWithIsolator("cpu", func() error { + opts = append(opts, unit.NewUnitOption("Service", "CPUShares", strconv.Itoa(int(*v)))) + return nil + }) } } diff --git a/tests/inspect/inspect.go b/tests/inspect/inspect.go index f958a01ba6..0d4fba4679 100644 --- a/tests/inspect/inspect.go +++ b/tests/inspect/inspect.go @@ -62,6 +62,7 @@ var ( PreSleep int PrintMemoryLimit bool PrintCPUQuota bool + PrintCPUShares bool FileName string Content string CheckCgroupMounts bool @@ -106,6 +107,7 @@ func init() { globalFlagset.IntVar(&globalFlags.PreSleep, "pre-sleep", -1, "Sleep before executing (in seconds)") globalFlagset.BoolVar(&globalFlags.PrintMemoryLimit, "print-memorylimit", false, "Print cgroup memory limit") globalFlagset.BoolVar(&globalFlags.PrintCPUQuota, "print-cpuquota", false, "Print cgroup cpu quota in milli-cores") + globalFlagset.BoolVar(&globalFlags.PrintCPUShares, "print-cpushares", false, "Print cgroup cpu shares") globalFlagset.StringVar(&globalFlags.FileName, "file-name", "", "The file to read/write, $FILE will be ignored if this is specified") globalFlagset.StringVar(&globalFlags.Content, "content", "", "The content to write, $CONTENT will be ignored if this is specified") globalFlagset.BoolVar(&globalFlags.CheckCgroupMounts, "check-cgroups", false, "Try to write to the cgroup filesystem. Everything should be RO except some well-known files") @@ -456,6 +458,24 @@ func main() { fmt.Printf("CPU Quota: %s\n", strconv.Itoa(quotaMilliCores)) } + if globalFlags.PrintCPUShares { + cpuCgroupPath, err := v1.GetOwnCgroupPath("cpu") + if err != nil { + fmt.Fprintf(os.Stderr, "Error getting own cpu cgroup path: %v\n", err) + os.Exit(1) + } + // we use /proc/1/root to escape the chroot we're in and read our + // cpu quota + sharesPath := filepath.Join("/proc/1/root/sys/fs/cgroup/cpu", cpuCgroupPath, "cpu.shares") + sharesBytes, err := ioutil.ReadFile(sharesPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Can't read cpu.shares\n") + os.Exit(1) + } + + fmt.Printf("CPU Shares: %s", string(sharesBytes)) + } + if globalFlags.CheckCgroupMounts { rootCgroupPath := "/proc/1/root/sys/fs/cgroup" testPaths := []string{rootCgroupPath} diff --git a/tests/rkt_app_isolator_test.go b/tests/rkt_app_isolator_test.go index 8ef4cd8234..78a5e4eb8a 100644 --- a/tests/rkt_app_isolator_test.go +++ b/tests/rkt_app_isolator_test.go @@ -49,6 +49,14 @@ var cpuTest = struct { []string{"--exec=/inspect --print-cpuquota", "--capability=CAP_SYS_PTRACE"}, } +var cpuSharesTest = struct { + testName string + aciBuildArgs []string +}{ + `Check CPU shares`, + []string{"--exec=/inspect --print-cpushares", "--capability=CAP_SYS_PTRACE"}, +} + var cgroupsTest = struct { testName string aciBuildArgs []string @@ -102,3 +110,35 @@ func TestAppIsolatorCPU(t *testing.T) { expectedLine = "CPU Quota: " + strconv.Itoa(900) runRktAndCheckOutput(t, rktCmd, expectedLine, false) } + +func TestAppIsolatorCPUShares(t *testing.T) { + isUnified, err := cgroup.IsCgroupUnified("/") + if err != nil { + t.Fatalf("Error determining the cgroup version: %v", err) + } + + if isUnified { + t.Skip("kernel does not support cpu isolator in cgroup2.") + } + + ok, err := cgroup.IsIsolatorSupported("cpu") + if err != nil { + t.Fatalf("Error checking cpu isolator support: %v", err) + } + + if !ok { + t.Skip("CPU isolator not supported.") + } + + ctx := testutils.NewRktRunCtx() + defer ctx.Cleanup() + + t.Logf("Running test: %v", cpuSharesTest.testName) + + aciFileName := patchTestACI("rkt-inspect-isolators.aci", cpuSharesTest.aciBuildArgs...) + defer os.Remove(aciFileName) + + rktCmd := fmt.Sprintf("%s --insecure-options=image run --mds-register=false %s --cpu-shares 12345", ctx.Cmd(), aciFileName) + expectedLine := "CPU Shares: 12345" + runRktAndCheckOutput(t, rktCmd, expectedLine, false) +} From 4f5f81d4205ee5b1bf403b857025d7292c8fe3d4 Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Tue, 1 Nov 2016 09:02:16 +0100 Subject: [PATCH 29/32] cri: add app subcommands --- rkt/app.go | 28 ++++ rkt/app_add.go | 146 +++++++++++++++++++++ rkt/app_exec.go | 37 ++++++ rkt/app_list.go | 65 +++++++++ rkt/app_rm.go | 93 +++++++++++++ rkt/app_sandbox.go | 321 +++++++++++++++++++++++++++++++++++++++++++++ rkt/app_start.go | 94 +++++++++++++ rkt/app_status.go | 134 +++++++++++++++++++ rkt/app_stop.go | 99 ++++++++++++++ 9 files changed, 1017 insertions(+) create mode 100644 rkt/app.go create mode 100644 rkt/app_add.go create mode 100644 rkt/app_exec.go create mode 100644 rkt/app_list.go create mode 100644 rkt/app_rm.go create mode 100644 rkt/app_sandbox.go create mode 100644 rkt/app_start.go create mode 100644 rkt/app_status.go create mode 100644 rkt/app_stop.go diff --git a/rkt/app.go b/rkt/app.go new file mode 100644 index 0000000000..02c4b0e97e --- /dev/null +++ b/rkt/app.go @@ -0,0 +1,28 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "github.com/spf13/cobra" + +var ( + cmdApp = &cobra.Command{ + Use: "app [command]", + Short: "Operate on app level operations", + } +) + +func init() { + cmdRkt.AddCommand(cmdApp) +} diff --git a/rkt/app_add.go b/rkt/app_add.go new file mode 100644 index 0000000000..0c7da65ef5 --- /dev/null +++ b/rkt/app_add.go @@ -0,0 +1,146 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + + "github.com/coreos/rkt/common" + "github.com/coreos/rkt/common/apps" + pkgPod "github.com/coreos/rkt/pkg/pod" + "github.com/coreos/rkt/rkt/image" + "github.com/coreos/rkt/stage0" + "github.com/coreos/rkt/store/imagestore" + "github.com/coreos/rkt/store/treestore" + + "github.com/spf13/cobra" +) + +var ( + cmdAppAdd = &cobra.Command{ + Use: "add UUID IMAGEID ...", + Short: "Add an app to a pod", + Long: `This allows addin an app that's present on the store to a running pod`, + Run: runWrapper(runAppAdd), + } +) + +func init() { + cmdApp.AddCommand(cmdAppAdd) + addAppFlags(cmdAppAdd) + addIsolatorFlags(cmdAppAdd, false) + + // Add per-app volume mounts only for sandbox for now + cmdAppAdd.Flags().Var((*appMountVolume)(&rktApps), "mnt-volume", "Configure a per-app mount and volume directly") + + // Disable interspersed flags to stop parsing after the first non flag + // argument. All the subsequent parsing will be done by parseApps. + // This is needed to correctly handle image args + cmdAppAdd.Flags().SetInterspersed(false) +} + +func runAppAdd(cmd *cobra.Command, args []string) (exit int) { + if len(args) < 2 { + stderr.Print("must provide the pod UUID and an IMAGEID") + return 1 + } + + err := parseApps(&rktApps, args[1:], cmd.Flags(), true) + if err != nil { + stderr.PrintE("error parsing app image arguments", err) + return 1 + } + + if rktApps.Count() > 1 { + stderr.Print("must give only one app") + return 1 + } + + p, err := pkgPod.PodFromUUIDString(getDataDir(), args[0]) + if err != nil { + stderr.PrintE("problem retrieving pod", err) + return 1 + } + defer p.Close() + + if p.State() != pkgPod.Running { + stderr.Printf("pod %q isn't currently running", p.UUID) + return 1 + } + + s, err := imagestore.NewStore(storeDir()) + if err != nil { + stderr.PrintE("cannot open store", err) + return 1 + } + + ts, err := treestore.NewStore(treeStoreDir(), s) + if err != nil { + stderr.PrintE("cannot open treestore", err) + return 1 + } + + fn := &image.Finder{ + S: s, + Ts: ts, + Ks: getKeystore(), + + StoreOnly: true, + NoStore: false, + } + + img, err := fn.FindImage(args[1], "", apps.AppImageGuess) + if err != nil { + stderr.PrintE("error finding images", err) + return 1 + } + + podPID, err := p.ContainerPid1() + if err != nil { + stderr.PrintE(fmt.Sprintf("unable to determine the pid for pod %q", p.UUID), err) + return 1 + } + + ccfg := stage0.CommonConfig{ + Store: s, + TreeStore: ts, + UUID: p.UUID, + Debug: globalFlags.Debug, + } + + rktgid, err := common.LookupGid(common.RktGroup) + if err != nil { + stderr.Printf("group %q not found, will use default gid when rendering images", common.RktGroup) + rktgid = -1 + } + + cfg := stage0.AddConfig{ + CommonConfig: &ccfg, + Image: *img, + Apps: &rktApps, + RktGid: rktgid, + UsesOverlay: p.UsesOverlay(), + PodPath: p.Path(), + PodPID: podPID, + } + + err = stage0.AddApp(cfg) + if err != nil { + stderr.PrintE("error adding app to pod", err) + return 1 + } + + return 0 +} diff --git a/rkt/app_exec.go b/rkt/app_exec.go new file mode 100644 index 0000000000..d02b10242f --- /dev/null +++ b/rkt/app_exec.go @@ -0,0 +1,37 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import "github.com/spf13/cobra" + +var ( + // Not using alias because we want 'rkt app exec' appears as + // a subcommand of 'rkt app'. + cmdAppExec = &cobra.Command{ + Use: "exec [--app=APP_NAME] UUID [CMD [ARGS ...]]", + Short: "Execute commands in the given app's namespace.", + Long: "This executes the commands in the given app's namespace. The UUID is the UUID of a running pod. the app name is specified by --app. If CMD and ARGS are empty, then it will execute '/bin/bash' by default.", + Run: ensureSuperuser(runWrapper(runEnter)), + } +) + +func init() { + cmdApp.AddCommand(cmdAppExec) + cmdAppExec.Flags().StringVar(&flagAppName, "app", "", "name of the app to exec within the specified pod, can be empty if there is only one app in the pod.") + // Disable interspersed flags to stop parsing after the first non flag + // argument. This is need to permit to correctly handle + // ARGS for the CMD. + cmdAppExec.Flags().SetInterspersed(false) +} diff --git a/rkt/app_list.go b/rkt/app_list.go new file mode 100644 index 0000000000..87e1361edb --- /dev/null +++ b/rkt/app_list.go @@ -0,0 +1,65 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "bytes" + "fmt" + + rkt "github.com/coreos/rkt/lib" + "github.com/spf13/cobra" +) + +var ( + cmdAppList = &cobra.Command{ + Use: "list UUID", + Short: "List apps for the given pod", + Long: "This only lists the name and state of the apps, app status will show more detailed info.", + Run: runWrapper(runAppList), + } +) + +func init() { + cmdApp.AddCommand(cmdAppList) + cmdAppList.Flags().BoolVar(&flagNoLegend, "no-legend", false, "suppress a legend with the list") +} + +func runAppList(cmd *cobra.Command, args []string) int { + if len(args) != 1 { + cmd.Usage() + return 1 + } + + apps, err := rkt.AppsForPod(args[0], getDataDir(), "") + if err != nil { + stderr.PrintE("error listing apps", err) + return 1 + } + + tabBuffer := new(bytes.Buffer) + tabOut := getTabOutWithWriter(tabBuffer) + + if !flagNoLegend { + fmt.Fprintf(tabOut, "NAME\tSTATE\n") + } + + for _, app := range apps { + fmt.Fprintf(tabOut, "%s\t%s\n", app.Name, app.State) + } + + tabOut.Flush() + stdout.Print(tabBuffer) + return 0 +} diff --git a/rkt/app_rm.go b/rkt/app_rm.go new file mode 100644 index 0000000000..10d4e9f601 --- /dev/null +++ b/rkt/app_rm.go @@ -0,0 +1,93 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + + pkgPod "github.com/coreos/rkt/pkg/pod" + "github.com/coreos/rkt/stage0" + + "github.com/appc/spec/schema/types" + "github.com/spf13/cobra" +) + +var ( + cmdAppRm = &cobra.Command{ + Use: "rm UUID --app=NAME", + Short: "Remove an app from a pod", + Long: `This allows removing an app from a running pod, it will stop it if it's still running`, + Run: runWrapper(runAppRm), + } +) + +func init() { + cmdAppRm.Flags().StringVar(&flagAppName, "app", "", "app to remove") + cmdApp.AddCommand(cmdAppRm) +} + +func runAppRm(cmd *cobra.Command, args []string) (exit int) { + if len(args) < 1 { + stderr.Print("must provide the pod UUID") + return 1 + } + + if flagAppName == "" { + stderr.Print("must provide the app to remove") + return 1 + } + + p, err := pkgPod.PodFromUUIDString(getDataDir(), args[0]) + if err != nil { + stderr.PrintE("problem retrieving pod", err) + return 1 + } + defer p.Close() + + appName, err := types.NewACName(flagAppName) + if err != nil { + stderr.PrintE("invalid app name", err) + } + + podPID := -1 + if p.State() == pkgPod.Running { + podPID, err = p.ContainerPid1() + if err != nil { + stderr.PrintE(fmt.Sprintf("unable to determine the pid for pod %q", p.UUID), err) + return 1 + } + } + + ccfg := stage0.CommonConfig{ + UUID: p.UUID, + Debug: globalFlags.Debug, + } + + cfg := stage0.RmConfig{ + CommonConfig: &ccfg, + UsesOverlay: p.UsesOverlay(), + AppName: appName, + PodPath: p.Path(), + PodPID: podPID, + } + + err = stage0.RmApp(cfg) + if err != nil { + stderr.PrintE("error removing app", err) + return 1 + } + + return 0 +} diff --git a/rkt/app_sandbox.go b/rkt/app_sandbox.go new file mode 100644 index 0000000000..67d634f852 --- /dev/null +++ b/rkt/app_sandbox.go @@ -0,0 +1,321 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "net" + "strconv" + "strings" + + "github.com/appc/spec/schema/types" + "github.com/coreos/rkt/common" + "github.com/coreos/rkt/pkg/label" + "github.com/coreos/rkt/pkg/lock" + "github.com/coreos/rkt/pkg/pod" + "github.com/coreos/rkt/pkg/user" + "github.com/coreos/rkt/stage0" + "github.com/coreos/rkt/store/imagestore" + "github.com/coreos/rkt/store/treestore" + "github.com/spf13/cobra" +) + +var ( + cmdAppSandbox = &cobra.Command{ + Use: "sandbox", + Short: "Create an empty pod application sandbox", + Long: "Initializes an empty pod having no applications.", + Run: runWrapper(runAppSandbox), + } + flagAppPorts appPortList + flagAnnotations kvMap + flagLabels kvMap +) + +func init() { + cmdApp.AddCommand(cmdAppSandbox) + + addStage1ImageFlags(cmdAppSandbox.Flags()) + // TODO(sur) + cmdAppSandbox.Flags().StringVar(&flagUUIDFileSave, "uuid-file-save", "", "write out pod UUID to specified file") + cmdAppSandbox.Flags().Var(&flagNet, "net", "configure the pod's networking. Optionally, pass a list of user-configured networks to load and set arguments to pass to each network, respectively. Syntax: --net[=n[:args], ...]") + cmdAppSandbox.Flags().BoolVar(&flagNoOverlay, "no-overlay", false, "disable overlay filesystem") + cmdAppSandbox.Flags().Var(&flagDNS, "dns", "name servers to write in /etc/resolv.conf") + cmdAppSandbox.Flags().Var(&flagDNSSearch, "dns-search", "DNS search domains to write in /etc/resolv.conf") + cmdAppSandbox.Flags().Var(&flagDNSOpt, "dns-opt", "DNS options to write in /etc/resolv.conf") + cmdAppSandbox.Flags().StringVar(&flagHostname, "hostname", "", `pod's hostname. If empty, it will be "rkt-$PODUUID"`) + cmdAppSandbox.Flags().Var(&flagAppPorts, "port", "ports to forward. format: \"name:proto:podPort:hostIP:hostPort\"") + + flagAppPorts = appPortList{} + cmdAppSandbox.Flags().Var(&flagAnnotations, "user-annotation", "optional, set the pod's annotations in the form of key=value") + cmdAppSandbox.Flags().Var(&flagLabels, "user-label", "optional, set the pod's label in the form of key=value") +} + +func runAppSandbox(cmd *cobra.Command, args []string) int { + s, err := imagestore.NewStore(storeDir()) + if err != nil { + stderr.PrintE("cannot open store", err) + return 1 + } + + ts, err := treestore.NewStore(treeStoreDir(), s) + if err != nil { + stderr.PrintE("cannot open treestore", err) + return 1 + } + + config, err := getConfig() + if err != nil { + stderr.PrintE("cannot get configuration", err) + return 1 + } + + s1img, err := getStage1Hash(s, ts, config) + if err != nil { + stderr.Error(err) + return 1 + } + + p, err := pod.NewPod(getDataDir()) + if err != nil { + stderr.PrintE("error creating new pod", err) + return 1 + } + + if flagUUIDFileSave != "" { + if err := pod.WriteUUIDToFile(p.UUID, flagUUIDFileSave); err != nil { + stderr.PrintE("error saving pod UUID to file", err) + return 1 + } + } + + processLabel, mountLabel, err := label.InitLabels("/var/run/rkt/mcs", []string{}) + if err != nil { + stderr.PrintE("error initialising SELinux", err) + return 1 + } + + p.MountLabel = mountLabel + cfg := stage0.CommonConfig{ + MountLabel: mountLabel, + ProcessLabel: processLabel, + Store: s, + TreeStore: ts, + Stage1Image: *s1img, + UUID: p.UUID, + Debug: globalFlags.Debug, + Mutable: true, + } + + ovlOk := true + if err := common.PathSupportsOverlay(getDataDir()); err != nil { + if oerr, ok := err.(common.ErrOverlayUnsupported); ok { + stderr.Printf("disabling overlay support: %q", oerr.Error()) + ovlOk = false + } else { + stderr.PrintE("error determining overlay support", err) + return 1 + } + } + + useOverlay := !flagNoOverlay && ovlOk + + pcfg := stage0.PrepareConfig{ + CommonConfig: &cfg, + UseOverlay: useOverlay, + PrivateUsers: user.NewBlankUidRange(), + SkipTreeStoreCheck: globalFlags.InsecureFlags.SkipOnDiskCheck(), + Apps: &rktApps, + Ports: []types.ExposedPort(flagAppPorts), + UserAnnotations: parseAnnotations(&flagAnnotations), + UserLabels: parseLabels(&flagLabels), + } + + if globalFlags.Debug { + stage0.InitDebug() + } + + keyLock, err := lock.SharedKeyLock(lockDir(), common.PrepareLock) + if err != nil { + stderr.PrintE("cannot get shared prepare lock", err) + return 1 + } + + err = stage0.Prepare(pcfg, p.Path(), p.UUID) + if err != nil { + stderr.PrintE("error setting up stage0", err) + keyLock.Close() + return 1 + } + keyLock.Close() + + // get the lock fd for run + lfd, err := p.Fd() + if err != nil { + stderr.PrintE("error getting pod lock fd", err) + return 1 + } + + // skip prepared by jumping directly to run, we own this pod + if err := p.ToRun(); err != nil { + stderr.PrintE("unable to transition to run", err) + return 1 + } + + rktgid, err := common.LookupGid(common.RktGroup) + if err != nil { + stderr.Printf("group %q not found, will use default gid when rendering images", common.RktGroup) + rktgid = -1 + } + + DNSConfMode, DNSConfig, HostsEntries, err := parseDNSFlags(flagHostsEntries, flagDNS, flagDNSSearch, flagDNSOpt, flagDNSDomain) + if err != nil { + stderr.PrintE("error with dns flags", err) + return 1 + } + + rcfg := stage0.RunConfig{ + CommonConfig: &cfg, + Net: flagNet, + LockFd: lfd, + Interactive: true, + DNSConfMode: DNSConfMode, + DNSConfig: DNSConfig, + MDSRegister: false, + LocalConfig: globalFlags.LocalConfigDir, + RktGid: rktgid, + Hostname: flagHostname, + InsecureCapabilities: globalFlags.InsecureFlags.SkipCapabilities(), + InsecurePaths: globalFlags.InsecureFlags.SkipPaths(), + InsecureSeccomp: globalFlags.InsecureFlags.SkipSeccomp(), + UseOverlay: useOverlay, + HostsEntries: *HostsEntries, + } + + _, manifest, err := p.PodManifest() + if err != nil { + stderr.PrintE("cannot get the pod manifest", err) + return 1 + } + rcfg.Apps = manifest.Apps + stage0.Run(rcfg, p.Path(), getDataDir()) // execs, never returns + + return 1 +} + +/* + * The sandbox uses a different style of port forwarding - instead of mapping + * from port to app (via name), we just map ports directly. + * + * The format is name:proto:podPort:hostIP:hostPort + * e.g. http:tcp:8080:0.0.0.0:80 + */ +type appPortList []types.ExposedPort + +func (apl *appPortList) Set(s string) error { + parts := strings.SplitN(s, ":", 5) + if len(parts) != 5 { + return fmt.Errorf("--port invalid format") + } + + // parsey parsey + name, err := types.NewACName(parts[0]) + if err != nil { + return err + } + + proto := parts[1] + switch proto { + case "tcp", "udp": + default: + return fmt.Errorf("invalid protocol %q", proto) + } + + p, err := strconv.ParseUint(parts[2], 10, 16) + if err != nil { + return err + } + podPortNo := uint(p) + + ip := net.ParseIP(parts[3]) + if ip == nil { + return fmt.Errorf("could not parse IP %q", ip) + } + + p, err = strconv.ParseUint(parts[4], 10, 16) + if err != nil { + return err + } + hostPortNo := uint(p) + + podSide := types.Port{ + Name: *name, + Protocol: proto, + Port: podPortNo, + Count: 1, + SocketActivated: false, + } + + hostSide := types.ExposedPort{ + Name: *name, + HostPort: hostPortNo, + HostIP: ip, + PodPort: &podSide, + } + + *apl = append(*apl, hostSide) + return nil +} + +func (apl *appPortList) String() string { + ss := make([]string, 0, len(*apl)) + for _, p := range *apl { + ss = append(ss, fmt.Sprintf("%s:%s:%d:%s:%d", + p.Name, p.PodPort.Protocol, p.PodPort.Port, + p.HostIP, p.HostPort)) + + } + return strings.Join(ss, ",") +} + +func (apl *appPortList) Type() string { + return "appPortList" +} + +// parseAnnotations converts the annotations set by '--user-annotation' flag, +// and returns types.UserAnnotations. +func parseAnnotations(flagAnnotations *kvMap) types.UserAnnotations { + if flagAnnotations.IsEmpty() { + return nil + } + annotations := make(types.UserAnnotations) + for k, v := range flagAnnotations.mapping { + annotations[k] = v + } + return annotations +} + +// parseLabels converts the labels set by '--user-label' flag, +// and returns types.UserLabels. +func parseLabels(flagLabels *kvMap) types.UserLabels { + if flagLabels.IsEmpty() { + return nil + } + labels := make(types.UserLabels) + for k, v := range flagLabels.mapping { + labels[k] = v + } + return labels +} diff --git a/rkt/app_start.go b/rkt/app_start.go new file mode 100644 index 0000000000..e2669b6670 --- /dev/null +++ b/rkt/app_start.go @@ -0,0 +1,94 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + + pkgPod "github.com/coreos/rkt/pkg/pod" + "github.com/coreos/rkt/stage0" + + "github.com/appc/spec/schema/types" + "github.com/spf13/cobra" +) + +var ( + cmdAppStart = &cobra.Command{ + Use: "start UUID --app=NAME", + Short: "Start an app in a pod", + Long: `Start appz!`, + Run: runWrapper(runAppStart), + } +) + +func init() { + cmdAppStart.Flags().StringVar(&flagAppName, "app", "", "app to start") + cmdApp.AddCommand(cmdAppStart) +} + +func runAppStart(cmd *cobra.Command, args []string) (exit int) { + if len(args) < 1 { + stderr.Print("must provide the pod UUID") + return 1 + } + + if flagAppName == "" { + stderr.Print("must provide the app to start") + return 1 + } + + p, err := pkgPod.PodFromUUIDString(getDataDir(), args[0]) + if err != nil { + stderr.PrintE("problem retrieving pod", err) + return 1 + } + defer p.Close() + + if p.State() != pkgPod.Running { + stderr.Printf("pod %q isn't currently running", p.UUID) + return 1 + } + + appName, err := types.NewACName(flagAppName) + if err != nil { + stderr.PrintE("invalid app name", err) + } + + podPID, err := p.ContainerPid1() + if err != nil { + stderr.PrintE(fmt.Sprintf("unable to determine the pid for pod %q", p.UUID), err) + return 1 + } + + cfg := stage0.CommonConfig{ + UUID: p.UUID, + Debug: globalFlags.Debug, + } + + scfg := stage0.StartConfig{ + CommonConfig: &cfg, + Dir: p.Path(), + AppName: appName, + PodPID: podPID, + } + + err = stage0.StartApp(scfg) + if err != nil { + stderr.PrintE("error starting app", err) + return 1 + } + + return 0 +} diff --git a/rkt/app_status.go b/rkt/app_status.go new file mode 100644 index 0000000000..35369f6a68 --- /dev/null +++ b/rkt/app_status.go @@ -0,0 +1,134 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "encoding/json" + "fmt" + "strings" + "time" + + rkt "github.com/coreos/rkt/lib" + "github.com/spf13/cobra" +) + +var ( + cmdAppStatus = &cobra.Command{ + Use: "status UUID --app=APP_NAME [--format=json]", + Short: "Check the status of an app in the given pod", + Long: "This will print detailed status of an app", + Run: runWrapper(runAppStatus), + } +) + +func init() { + cmdApp.AddCommand(cmdAppStatus) + cmdAppStatus.Flags().StringVar(&flagAppName, "app", "", "choose app within the pod, this flag must be set") + cmdAppStatus.Flags().StringVar(&flagFormat, "format", "", "choose the output format, allowed format includes 'json', 'json-pretty'. If empty, then the result is printed as key value pairs") +} + +func printApp(app *rkt.App) { + stdout.Printf("name=%s\n", app.Name) + stdout.Printf("state=%s\n", app.State) + stdout.Printf("image_id=%s\n", app.ImageID) + if app.CreatedAt != nil { + stdout.Printf("created_at=%v\n", time.Unix(0, *(app.CreatedAt))) + } + if app.StartedAt != nil { + stdout.Printf("started_at=%v\n", time.Unix(0, *(app.StartedAt))) + } + if app.FinishedAt != nil { + stdout.Printf("finished_at=%v\n", time.Unix(0, *(app.FinishedAt))) + } + if app.ExitCode != nil { + stdout.Printf("exit_code=%d\n", *(app.ExitCode)) + } + + if len(app.Mounts) > 0 { + stdout.Printf("mounts=") + var mnts []string + for _, mnt := range app.Mounts { + mnts = append(mnts, fmt.Sprintf("%s:%s:(read_only:%v)", mnt.HostPath, mnt.ContainerPath, mnt.ReadOnly)) + } + stdout.Printf(strings.Join(mnts, ",")) + stdout.Println() + } + + if len(app.UserAnnotations) > 0 { + stdout.Printf("user_annotations=") + var annos []string + for key, value := range app.UserAnnotations { + annos = append(annos, fmt.Sprintf("%s:%s", key, value)) + } + stdout.Printf(strings.Join(annos, ",")) + stdout.Println() + } + + if len(app.UserLabels) > 0 { + stdout.Printf("user_labels=") + var labels []string + for key, value := range app.UserLabels { + labels = append(labels, fmt.Sprintf("%s:%s", key, value)) + } + stdout.Printf(strings.Join(labels, ",")) + stdout.Println() + } +} + +func runAppStatus(cmd *cobra.Command, args []string) (exit int) { + if len(args) != 1 || flagAppName == "" { + cmd.Usage() + return 1 + } + + apps, err := rkt.AppsForPod(args[0], getDataDir(), flagAppName) + if err != nil { + stderr.PrintE("error getting app status", err) + return 1 + } + + if len(apps) == 0 { + stderr.Error(fmt.Errorf("cannot find app %q in the pod", flagAppName)) + return 1 + } + + // Must have only 1 app. + if len(apps) != 1 { + stderr.Error(fmt.Errorf("find more than one app with the name %q", flagAppName)) + return 1 + } + + // TODO(yifan): Print yamls. + switch flagFormat { + case "json": + result, err := json.Marshal(apps[0]) + if err != nil { + stderr.PrintE("error marshaling the app status", err) + return 1 + } + stdout.Print(string(result)) + case "json-pretty": + result, err := json.MarshalIndent(apps[0], "", "\t") + if err != nil { + stderr.PrintE("error marshaling the app status", err) + return 1 + } + stdout.Print(string(result)) + default: + printApp(apps[0]) + } + + return 0 +} diff --git a/rkt/app_stop.go b/rkt/app_stop.go new file mode 100644 index 0000000000..2743fc5bb1 --- /dev/null +++ b/rkt/app_stop.go @@ -0,0 +1,99 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + + pkgPod "github.com/coreos/rkt/pkg/pod" + "github.com/coreos/rkt/stage0" + + "github.com/appc/spec/schema/types" + "github.com/spf13/cobra" +) + +var ( + cmdAppStop = &cobra.Command{ + Use: "stop UUID --app=NAME", + Short: "Stop an app in a pod", + Long: `Stop appz!`, + Run: runWrapper(runAppStop), + } +) + +func init() { + cmdAppStop.Flags().StringVar(&flagAppName, "app", "", "app to stop") + cmdApp.AddCommand(cmdAppStop) +} + +func runAppStop(cmd *cobra.Command, args []string) (exit int) { + if len(args) < 1 { + stderr.Print("must provide the pod UUID") + return 1 + } + + if flagAppName == "" { + stderr.Print("must provide the app to remove") + return 1 + } + + p, err := pkgPod.PodFromUUIDString(getDataDir(), args[0]) + if err != nil { + stderr.PrintE("problem retrieving pod", err) + return 1 + } + defer p.Close() + + if p.AfterRun() { + stdout.Printf("pod %q is already stopped", p.UUID) + return 0 + } + + if p.State() != pkgPod.Running { + stderr.Printf("pod %q isn't currently running", p.UUID) + return 1 + } + + appName, err := types.NewACName(flagAppName) + if err != nil { + stderr.PrintE("invalid app name", err) + } + + podPID, err := p.ContainerPid1() + if err != nil { + stderr.PrintE(fmt.Sprintf("unable to determine the pid for pod %q", p.UUID), err) + return 1 + } + + cfg := stage0.CommonConfig{ + UUID: p.UUID, + Debug: globalFlags.Debug, + } + + scfg := stage0.StopConfig{ + CommonConfig: &cfg, + Dir: p.Path(), + AppName: appName, + PodPID: podPID, + } + + err = stage0.StopApp(scfg) + if err != nil { + stderr.PrintE("error stopping app", err) + return 1 + } + + return 0 +} From d071d3446ba0e16ae59ec4f4ef8dc77450bb6b81 Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Tue, 1 Nov 2016 09:17:40 +0100 Subject: [PATCH 30/32] stage0/app: mark app subcommands as hidden, gate behind app experiment. This also introduces a common.IsExperimentEnabled function for querying experiments in rkt. --- common/experiment.go | 47 ++++++++++++++++++++++++++++++++++++++++++++ rkt/app.go | 14 +++++++++---- 2 files changed, 57 insertions(+), 4 deletions(-) create mode 100644 common/experiment.go diff --git a/common/experiment.go b/common/experiment.go new file mode 100644 index 0000000000..48650ea763 --- /dev/null +++ b/common/experiment.go @@ -0,0 +1,47 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +import ( + "os" + "strconv" + "strings" +) + +// a string set of known rkt experiments +var experiments = map[string]struct{}{ + "app": {}, // rkt app subcommands for CRI +} + +// IsExperimentEnabled returns true if the given rkt experiment is enabled. +// The given name is converted to upper case and a bool RKT_EXPERIMENT_{NAME} +// environment variable is retrieved. +// If the experiment name is unknown, false is returned. +// If the environment variable does not contain a valid bool value +// according to strconv.ParseBool, false is returned. +func IsExperimentEnabled(name string) bool { + if _, ok := experiments[name]; !ok { + return false + } + + v := os.Getenv("RKT_EXPERIMENT_" + strings.ToUpper(name)) + + enabled, err := strconv.ParseBool(v) + if err != nil { + return false // ignore errors from bool conversion + } + + return enabled +} diff --git a/rkt/app.go b/rkt/app.go index 02c4b0e97e..cca977a7d3 100644 --- a/rkt/app.go +++ b/rkt/app.go @@ -14,15 +14,21 @@ package main -import "github.com/spf13/cobra" +import ( + "github.com/coreos/rkt/common" + "github.com/spf13/cobra" +) var ( cmdApp = &cobra.Command{ - Use: "app [command]", - Short: "Operate on app level operations", + Use: "app [command]", + Short: "Operate on app level operations", + Hidden: true, } ) func init() { - cmdRkt.AddCommand(cmdApp) + if common.IsExperimentEnabled("app") { + cmdRkt.AddCommand(cmdApp) + } } From 2f5de7f341ca626a140ee9a43f06254fb53cb301 Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Thu, 3 Nov 2016 10:05:38 +0100 Subject: [PATCH 31/32] lib: use nanoseconds for app state This reintroduces nanosecond units for reporting the app state. --- lib/app.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/app.go b/lib/app.go index 9b2cfd3f49..ba4114b459 100644 --- a/lib/app.go +++ b/lib/app.go @@ -118,7 +118,7 @@ func appState(app *App, pod *pkgPod.Pod) error { fmt.Fprintf(os.Stderr, "Cannot get GC marked time: %v", err) } if !t.IsZero() { - finishedAt := t.Unix() + finishedAt := t.UnixNano() app.FinishedAt = &finishedAt } } @@ -135,7 +135,7 @@ func appState(app *App, pod *pkgPod.Pod) error { } app.State = AppStateCreated - createdAt := fi.ModTime().Unix() + createdAt := fi.ModTime().UnixNano() app.CreatedAt = &createdAt // Check if the app is started. @@ -148,7 +148,7 @@ func appState(app *App, pod *pkgPod.Pod) error { } app.State = AppStateRunning - startedAt := fi.ModTime().Unix() + startedAt := fi.ModTime().UnixNano() app.StartedAt = &startedAt // Check if the app is exited. @@ -162,7 +162,7 @@ func appState(app *App, pod *pkgPod.Pod) error { } app.State = AppStateExited - finishedAt := fi.ModTime().Unix() + finishedAt := fi.ModTime().UnixNano() app.FinishedAt = &finishedAt // Read exit code. From 7dd24cfe16641d5b5b37ac7a1129e2c296f331f1 Mon Sep 17 00:00:00 2001 From: Luca Bruno Date: Sun, 6 Nov 2016 22:00:56 +0000 Subject: [PATCH 32/32] stage1: document experimental interface v5 This commit introduces and unifies "crossing entrypoints" for app and attach commands, defining an experimental interface v5 (yet to be finalized). --- .../devel/stage1-implementors-guide.md | 90 ++++++++++++++++--- common/common.go | 7 ++ stage0/app.go | 89 +++--------------- stage0/common.go | 74 +++++++++++++++ stage1/app-add/app-add.go | 16 ++-- stage1/app-rm/app-rm.go | 22 +++-- stage1/app-start/app-start.go | 14 +-- stage1/app-stop/app-stop.go | 19 ++-- stage1/common/run.go | 18 ++++ 9 files changed, 221 insertions(+), 128 deletions(-) create mode 100644 stage0/common.go diff --git a/Documentation/devel/stage1-implementors-guide.md b/Documentation/devel/stage1-implementors-guide.md index a5cd3241ae..0dc3679d3f 100644 --- a/Documentation/devel/stage1-implementors-guide.md +++ b/Documentation/devel/stage1-implementors-guide.md @@ -90,6 +90,12 @@ Any stage1 that supports and expects machined registration to occur will likely `resolv.conf` is to create /etc/rkt-resolv.conf iff a CNI plugin specifies it, and for `hosts` is to create a fallback if the app does not provide it. +#### Arguments added in interface version 5 (experimental) + +This interface version is not yet finalized, thus marked as experimental. + +* `--mutable` to run a mutable pod + ### rkt enter `coreos.com/rkt/stage1/enter` @@ -139,15 +145,27 @@ In the bundled rkt stage 1, the entrypoint is sending SIGTERM signal to systemd- * `--force` to force the stopping of the pod. E.g. in the bundled rkt stage 1, stop sends SIGKILL * UUID of the pod +## Crossing Entrypoints + +Some entrypoints need to perform actions in the context of stage1 or stage2. As such they need to cross stage boundaries (thus the name) and depend on the `enter` entrypoint existence. All crossing entrypoints receive additional options for entering via the following environmental flags: + +* `RKT_STAGE1_ENTERCMD` specify the command to be called to enter a stage1 or a stage2 environment +* `RKT_STAGE1_ENTERPID` specify the PID of the stage1 to enter +* `RKT_STAGE1_ENTERAPP` optionally specify the application name of the stage2 to enter + ### rkt app add +(Experimental, to be stabilized in version 5) + `coreos.com/rkt/stage1/app/add` -#### Arguments +This is a crossing entrypoint. -`start $OPTIONS UUID APPNAME ENTERENTRYPOINT PID` +#### Arguments +* `--app` application name * `--debug` to activate debugging +* `--uuid` UUID of the pod * `--disable-capabilities-restriction` gives all capabilities to apps (overrides `retain-set` and `remove-set`) * `--disable-paths` disables inaccessible and read-only paths (such as `/proc/sysrq-trigger`) * `--disable-seccomp` disables seccomp (overrides `retain-set` and `remove-set`) @@ -155,49 +173,69 @@ In the bundled rkt stage 1, the entrypoint is sending SIGTERM signal to systemd- ### rkt app start +(Experimental, to be stabilized in version 5) + `coreos.com/rkt/stage1/app/start` -#### Arguments +This is a crossing entrypoint. -`start $OPTIONS UUID APPNAME ENTERENTRYPOINT PID` +#### Arguments +* `--app` application name * `--debug` to activate debugging ### rkt app stop +(Experimental, to be stabilized in version 5) + `coreos.com/rkt/stage1/app/stop` -#### Arguments +This is a crossing entrypoint. -`stop $OPTIONS UUID APPNAME ENTERENTRYPOINT PID` +#### Arguments +* `--app` application name * `--debug` to activate debugging ### rkt app rm +(Experimental, to be stabilized in version 5) + `coreos.com/rkt/stage1/app/rm` -#### Arguments +This is a crossing entrypoint. -`rm $OPTIONS UUID APPNAME ENTERENTRYPOINT PID` +#### Arguments +* `--app` application name * `--debug` to activate debugging -## Metadata +### rkt attach -### Mutable pods +(Experimental, to be stabilized in version 5) -Stage1 images can support mutable pod environments, where, once a pod has been started, applications can be added/started/stopped/removed while the actual pod is running. This information is persisted at runtime in the pod manifest using the `coreos.com/rkt/stage1/mutable` annotation. +`coreos.com/rkt/stage1/attach` -If the annotation is not present, `false` is assumed. +This is a crossing entrypoint. + +#### Arguments + +* `--action` action to perform (`auto-attach`, `custom-attach` or `list`) +* `--app` application name +* `--debug` to activate debugging +* `--tty-in` whether to attach TTY input (`true` or `false`) +* `--tty-out` whether to attach TTY output (`true` or `false`) +* `--stdin` whether to attach stdin (`true` or `false`) +* `--stdout` whether to attach stdout (`true` or `false`) +* `--stderr` whether to attach stderr (`true` or `false`) + +## Stage1 Metadata ### Versioning The stage1 command line interface is versioned using an annotation with the name `coreos.com/rkt/stage1/interface-version`. If the annotation is not present, rkt assumes the version is 1. -The current version of the stage1 interface is 3. - ## Examples ### Stage1 ACI manifest @@ -246,6 +284,24 @@ The current version of the stage1 interface is 3. } ``` +## Runtime Metadata + +Pods and applications can be annotated at runtime to signal support for specific features. + +### Mutable pods (experimental v5) + +Stage1 images can support mutable pod environments, where, once a pod has been started, applications can be added/started/stopped/removed while the actual pod is running. This information is persisted at runtime in the pod manifest using the `coreos.com/rkt/stage1/mutable` annotation. + +If the annotation is not present, `false` is assumed. + +### Attachable applications (experimental v5) + +Stage1 images can support attachable applications, where I/O and TTY from each applications can be dynamically redirected and attached to. +In that case, this information is persisted at runtime in each application manifest using the following annotations: + - `coreos.com/rkt/stage2/stdin` + - `coreos.com/rkt/stage2/stdout` + - `coreos.com/rkt/stage2/stderr` + ## Filesystem Layout Assumptions The following paths are reserved for the stage1 image, and they will be created during stage0. @@ -275,5 +331,11 @@ Later the exit status can be retrieved and shown by `rkt status $uuid`. This directory path is used for passing environment variables to each app. For example, environment variables for an app named `foo` will be stored in `rkt/env/foo`. +### iottymux (experimental v5) + +`rkt/iottymux` + +This directory path is used for TTY and streaming attach helper. +When attach mode is enabled each application will have a `rkt/iottymux/$appname/` directory, used by the I/O and TTY mux sidecar. [rkt-networking]: ../networking/overview.md diff --git a/common/common.go b/common/common.go index 7bb86077a2..143f64358f 100644 --- a/common/common.go +++ b/common/common.go @@ -67,6 +67,13 @@ const ( // Default perm bits for the regular directories // within the stage1 directory. DefaultRegularDirPerm = os.FileMode(0750) + + // Enter command for crossing entrypoints. + CrossingEnterCmd = "RKT_STAGE1_ENTERCMD" + // Stage1 (PID) to enter, used by crossing entrypoints. + CrossingEnterPID = "RKT_STAGE1_ENTERPID" + // Stage2 (application name) to enter, optionally used by crossing entrypoints. + CrossingEnterApp = "RKT_STAGE1_ENTERAPP" ) const ( diff --git a/stage0/app.go b/stage0/app.go index ebd5ec31d9..b7b2221935 100644 --- a/stage0/app.go +++ b/stage0/app.go @@ -22,7 +22,6 @@ import ( "fmt" "io/ioutil" "os" - "os/exec" "path/filepath" "strconv" "syscall" @@ -258,16 +257,9 @@ func AddApp(cfg AddConfig) error { return err } - eep, err := getStage1Entrypoint(cfg.PodPath, enterEntrypoint) - if err != nil { - return errwrap.Wrap(errors.New("error determining 'enter' entrypoint"), err) - } - args := []string{ - cfg.UUID.String(), - appName.String(), - filepath.Join(common.Stage1RootfsPath(cfg.PodPath), eep), - strconv.Itoa(cfg.PodPID), + fmt.Sprintf("--uuid=%s", cfg.UUID), + fmt.Sprintf("--app=%s", appName), } if cfg.InsecureCapabilities { @@ -295,7 +287,7 @@ func AddApp(cfg AddConfig) error { return err } - if err := callEntrypoint(cfg.PodPath, appAddEntrypoint, args); err != nil { + if err := RunCrossingEntrypoint(cfg.PodPath, cfg.PodPID, appName.String(), appAddEntrypoint, args); err != nil { return err } @@ -339,43 +331,6 @@ func updateFile(path string, contents []byte) error { return nil } -func callEntrypoint(dir, entrypoint string, args []string) error { - previousDir, err := os.Getwd() - if err != nil { - return err - } - - debug("Pivoting to filesystem %s", dir) - if err := os.Chdir(dir); err != nil { - return errwrap.Wrap(errors.New("failed changing to dir"), err) - } - - ep, err := getStage1Entrypoint(dir, entrypoint) - if err != nil { - return fmt.Errorf("%q not implemented for pod's stage1: %v", entrypoint, err) - } - execArgs := []string{filepath.Join(common.Stage1RootfsPath(dir), ep)} - debug("Execing %s", ep) - execArgs = append(execArgs, args...) - - c := exec.Cmd{ - Path: execArgs[0], - Args: execArgs, - Stdout: os.Stdout, - Stderr: os.Stderr, - } - - if err := c.Run(); err != nil { - return fmt.Errorf("error executing stage1's entrypoint %q: %v", entrypoint, err) - } - - if err := os.Chdir(previousDir); err != nil { - return errwrap.Wrap(errors.New("failed changing to dir"), err) - } - - return nil -} - func RmApp(cfg RmConfig) error { debug("locking pod") l, err := lock.ExclusiveLock(common.PodManifestLockPath(cfg.PodPath), lock.RegFile) @@ -414,23 +369,15 @@ func RmApp(cfg RmConfig) error { return err } - eep, err := getStage1Entrypoint(cfg.PodPath, enterEntrypoint) - if err != nil { - return errwrap.Wrap(errors.New("error determining 'enter' entrypoint"), err) - } - if cfg.PodPID > 0 { // Call app-stop and app-rm entrypoint only if the pod is still running. // Otherwise, there's not much we can do about it except unmounting/removing // the file system. args := []string{ - cfg.UUID.String(), - cfg.AppName.String(), - filepath.Join(common.Stage1RootfsPath(cfg.PodPath), eep), - strconv.Itoa(cfg.PodPID), + fmt.Sprintf("--app=%s", cfg.AppName), } - if err := callEntrypoint(cfg.PodPath, appStopEntrypoint, args); err != nil { + if err := RunCrossingEntrypoint(cfg.PodPath, cfg.PodPID, cfg.AppName.String(), appStopEntrypoint, args); err != nil { status, err := common.GetExitStatus(err) // ignore nonexistent units failing to stop. Exit status 5 // comes from systemctl and means the unit doesn't exist @@ -441,7 +388,7 @@ func RmApp(cfg RmConfig) error { } } - if err := callEntrypoint(cfg.PodPath, appRmEntrypoint, args); err != nil { + if err := RunCrossingEntrypoint(cfg.PodPath, cfg.PodPID, cfg.AppName.String(), appRmEntrypoint, args); err != nil { return err } } @@ -520,23 +467,15 @@ func StartApp(cfg StartConfig) error { return fmt.Errorf("error: nonexistent app %q", *cfg.AppName) } - eep, err := getStage1Entrypoint(cfg.Dir, enterEntrypoint) - if err != nil { - return errwrap.Wrap(errors.New("error determining 'enter' entrypoint"), err) - } - args := []string{ - cfg.UUID.String(), - cfg.AppName.String(), - filepath.Join(common.Stage1RootfsPath(cfg.Dir), eep), - strconv.Itoa(cfg.PodPID), + fmt.Sprintf("--app=%s", cfg.AppName), } if _, err := os.Create(common.AppStartedPath(p.Root, cfg.AppName.String())); err != nil { log.FatalE(fmt.Sprintf("error creating %s-started file", cfg.AppName.String()), err) } - if err := callEntrypoint(cfg.Dir, appStartEntrypoint, args); err != nil { + if err := RunCrossingEntrypoint(cfg.Dir, cfg.PodPID, cfg.AppName.String(), appStartEntrypoint, args); err != nil { return err } @@ -569,19 +508,11 @@ func StopApp(cfg StopConfig) error { return fmt.Errorf("error: nonexistent app %q", *cfg.AppName) } - eep, err := getStage1Entrypoint(cfg.Dir, enterEntrypoint) - if err != nil { - return errwrap.Wrap(errors.New("error determining 'enter' entrypoint"), err) - } - args := []string{ - cfg.UUID.String(), - cfg.AppName.String(), - filepath.Join(common.Stage1RootfsPath(cfg.Dir), eep), - strconv.Itoa(cfg.PodPID), + fmt.Sprintf("--app=%s", cfg.AppName), } - if err := callEntrypoint(cfg.Dir, appStopEntrypoint, args); err != nil { + if err := RunCrossingEntrypoint(cfg.Dir, cfg.PodPID, cfg.AppName.String(), appStopEntrypoint, args); err != nil { status, err := common.GetExitStatus(err) // exit status 5 comes from systemctl and means the unit doesn't exist if status == 5 { diff --git a/stage0/common.go b/stage0/common.go new file mode 100644 index 0000000000..94316dd34c --- /dev/null +++ b/stage0/common.go @@ -0,0 +1,74 @@ +// Copyright 2016 The rkt Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stage0 + +import ( + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/coreos/rkt/common" + "github.com/hashicorp/errwrap" +) + +// RunCrossingEntrypoint wraps the execution of a stage1 entrypoint which +// requires crossing the stage0/stage1/stage2 boundary during its execution, +// by setting up proper environment variables for enter. +func RunCrossingEntrypoint(dir string, podPID int, appName string, entrypoint string, entrypointArgs []string) error { + enterCmd, err := getStage1Entrypoint(dir, enterEntrypoint) + if err != nil { + return errwrap.Wrap(errors.New("error determining 'enter' entrypoint"), err) + } + + previousDir, err := os.Getwd() + if err != nil { + return err + } + + if err := os.Chdir(dir); err != nil { + return errwrap.Wrap(errors.New("failed changing to dir"), err) + } + + ep, err := getStage1Entrypoint(dir, entrypoint) + if err != nil { + return fmt.Errorf("%q not implemented for pod's stage1: %v", entrypoint, err) + } + execArgs := []string{filepath.Join(common.Stage1RootfsPath(dir), ep)} + execArgs = append(execArgs, entrypointArgs...) + + c := exec.Cmd{ + Path: execArgs[0], + Args: execArgs, + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, + Env: []string{ + fmt.Sprintf("%s=%s", common.CrossingEnterCmd, filepath.Join(common.Stage1RootfsPath(dir), enterCmd)), + fmt.Sprintf("%s=%d", common.CrossingEnterPID, podPID), + }, + } + + if err := c.Run(); err != nil { + return fmt.Errorf("error executing stage1 entrypoint: %v", err) + } + + if err := os.Chdir(previousDir); err != nil { + return errwrap.Wrap(errors.New("failed changing to dir"), err) + } + + return nil +} diff --git a/stage1/app-add/app-add.go b/stage1/app-add/app-add.go index 537ab8bf20..ddcc6e341b 100644 --- a/stage1/app-add/app-add.go +++ b/stage1/app-add/app-add.go @@ -18,7 +18,6 @@ package main import ( "flag" - "fmt" "io/ioutil" "os" "os/exec" @@ -28,6 +27,7 @@ import ( "github.com/coreos/rkt/common/cgroup" "github.com/coreos/rkt/common/cgroup/v1" rktlog "github.com/coreos/rkt/pkg/log" + stage1common "github.com/coreos/rkt/stage1/common" stage1types "github.com/coreos/rkt/stage1/common/types" stage1initcommon "github.com/coreos/rkt/stage1/init/common" @@ -35,6 +35,8 @@ import ( ) var ( + flagApp string + flagUUID string debug bool disableCapabilities bool disablePaths bool @@ -45,6 +47,8 @@ var ( ) func init() { + flag.StringVar(&flagApp, "app", "", "Application name") + flag.StringVar(&flagUUID, "uuid", "", "Pod UUID") flag.BoolVar(&debug, "debug", false, "Run in debug mode") flag.BoolVar(&disableCapabilities, "disable-capabilities-restriction", false, "Disable capability restrictions") flag.BoolVar(&disablePaths, "disable-paths", false, "Disable paths restrictions") @@ -52,7 +56,6 @@ func init() { flag.StringVar(&privateUsers, "private-users", "", "Run within user namespace. Can be set to [=UIDBASE[:NUIDS]]") } -// TODO use named flags instead of positional func main() { flag.Parse() @@ -63,21 +66,20 @@ func main() { diag.SetOutput(ioutil.Discard) } - uuid, err := types.NewUUID(flag.Arg(0)) + enterCmd := stage1common.PrepareEnterCmd(false) + + uuid, err := types.NewUUID(flagUUID) if err != nil { log.PrintE("UUID is missing or malformed", err) os.Exit(254) } - appName, err := types.NewACName(flag.Arg(1)) + appName, err := types.NewACName(flagApp) if err != nil { log.PrintE("invalid app name", err) os.Exit(254) } - enterCmd := []string{flag.Arg(2)} - enterCmd = append(enterCmd, fmt.Sprintf("--pid=%s", flag.Arg(3)), "--") - root := "." p, err := stage1types.LoadPod(root, uuid) if err != nil { diff --git a/stage1/app-rm/app-rm.go b/stage1/app-rm/app-rm.go index adace9b2e1..7cb6568f2a 100644 --- a/stage1/app-rm/app-rm.go +++ b/stage1/app-rm/app-rm.go @@ -18,7 +18,6 @@ package main import ( "flag" - "fmt" "io/ioutil" "os" "os/exec" @@ -26,22 +25,24 @@ import ( "github.com/coreos/rkt/common" rktlog "github.com/coreos/rkt/pkg/log" + stage1common "github.com/coreos/rkt/stage1/common" stage1initcommon "github.com/coreos/rkt/stage1/init/common" "github.com/appc/spec/schema/types" ) var ( - debug bool - log *rktlog.Logger - diag *rktlog.Logger + flagApp string + debug bool + log *rktlog.Logger + diag *rktlog.Logger ) func init() { + flag.StringVar(&flagApp, "app", "", "Application name") flag.BoolVar(&debug, "debug", false, "Run in debug mode") } -// TODO use named flags instead of positional func main() { flag.Parse() @@ -52,17 +53,15 @@ func main() { diag.SetOutput(ioutil.Discard) } - appName, err := types.NewACName(flag.Arg(1)) + appName, err := types.NewACName(flagApp) if err != nil { log.PrintE("invalid app name", err) os.Exit(254) } - enterEP := flag.Arg(2) + enterCmd := stage1common.PrepareEnterCmd(false) - args := []string{enterEP} - - args = append(args, fmt.Sprintf("--pid=%s", flag.Arg(3))) + args := enterCmd args = append(args, "/usr/bin/systemctl") args = append(args, "is-active") args = append(args, appName.String()) @@ -94,8 +93,7 @@ func main() { } } - args = []string{enterEP} - args = append(args, fmt.Sprintf("--pid=%s", flag.Arg(3))) + args = enterCmd args = append(args, "/usr/bin/systemctl") args = append(args, "daemon-reload") diff --git a/stage1/app-start/app-start.go b/stage1/app-start/app-start.go index 1248014e93..1535fcd92d 100644 --- a/stage1/app-start/app-start.go +++ b/stage1/app-start/app-start.go @@ -24,18 +24,21 @@ import ( "os/exec" rktlog "github.com/coreos/rkt/pkg/log" + stage1common "github.com/coreos/rkt/stage1/common" stage1initcommon "github.com/coreos/rkt/stage1/init/common" "github.com/appc/spec/schema/types" ) var ( - debug bool - log *rktlog.Logger - diag *rktlog.Logger + flagApp string + debug bool + log *rktlog.Logger + diag *rktlog.Logger ) func init() { + flag.StringVar(&flagApp, "app", "", "Application name") flag.BoolVar(&debug, "debug", false, "Run in debug mode") } @@ -49,14 +52,13 @@ func main() { diag.SetOutput(ioutil.Discard) } - appName, err := types.NewACName(flag.Arg(1)) + appName, err := types.NewACName(flagApp) if err != nil { log.PrintE("invalid app name", err) os.Exit(254) } - enterCmd := []string{flag.Arg(2)} - enterCmd = append(enterCmd, fmt.Sprintf("--pid=%s", flag.Arg(3)), "--") + enterCmd := stage1common.PrepareEnterCmd(false) args := enterCmd args = append(args, "/usr/bin/systemctl") diff --git a/stage1/app-stop/app-stop.go b/stage1/app-stop/app-stop.go index 48b52666e7..7637e4aaa3 100644 --- a/stage1/app-stop/app-stop.go +++ b/stage1/app-stop/app-stop.go @@ -18,29 +18,30 @@ package main import ( "flag" - "fmt" "io/ioutil" "os" "os/exec" "github.com/coreos/rkt/common" rktlog "github.com/coreos/rkt/pkg/log" + stage1common "github.com/coreos/rkt/stage1/common" stage1initcommon "github.com/coreos/rkt/stage1/init/common" "github.com/appc/spec/schema/types" ) var ( - debug bool - log *rktlog.Logger - diag *rktlog.Logger + flagApp string + debug bool + log *rktlog.Logger + diag *rktlog.Logger ) func init() { + flag.StringVar(&flagApp, "app", "", "Application name") flag.BoolVar(&debug, "debug", false, "Run in debug mode") } -// TODO use named flags instead of positional func main() { flag.Parse() @@ -51,17 +52,15 @@ func main() { diag.SetOutput(ioutil.Discard) } - appName, err := types.NewACName(flag.Arg(1)) + appName, err := types.NewACName(flagApp) if err != nil { log.PrintE("invalid app name", err) os.Exit(254) } - enterEP := flag.Arg(2) + enterCmd := stage1common.PrepareEnterCmd(false) - args := []string{enterEP} - - args = append(args, fmt.Sprintf("--pid=%s", flag.Arg(3))) + args := enterCmd args = append(args, "/usr/bin/systemctl") args = append(args, "stop") args = append(args, appName.String()) diff --git a/stage1/common/run.go b/stage1/common/run.go index 4b71bd58ef..fdb148d47b 100644 --- a/stage1/common/run.go +++ b/stage1/common/run.go @@ -21,6 +21,7 @@ import ( "os" "path/filepath" + "github.com/coreos/rkt/common" "github.com/coreos/rkt/pkg/sys" "github.com/hashicorp/errwrap" ) @@ -52,3 +53,20 @@ func WritePid(pid int, filename string) error { } return nil } + +// PrepareEnterCmd retrieves enter argument and prepare a command list +// to further run a command in stage1 context +func PrepareEnterCmd(enterStage2 bool) []string { + var args []string + enterCmd := os.Getenv(common.CrossingEnterCmd) + enterPID := os.Getenv(common.CrossingEnterPID) + if enterCmd != "" && enterPID != "" { + args = append(args, []string{enterCmd, fmt.Sprintf("--pid=%s", enterPID)}...) + enterApp := os.Getenv(common.CrossingEnterApp) + if enterApp != "" && enterStage2 { + args = append(args, fmt.Sprintf("--app=%s", enterApp)) + } + args = append(args, "--") + } + return args +}