From 35c1182baa83c8b969341a5a54942d665a89343c Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Mon, 24 Aug 2020 21:12:36 +0200 Subject: [PATCH 1/3] LVM mode: avoid unnecessary restart of container Unconditionally returning an error after a successful namespace creation caused the driver to start. The restarted driver then finds the existing namespace and continues. Symptom: State: Running Started: Mon, 24 Aug 2020 09:44:08 +0200 Last State: Terminated Reason: Completed Message: failed to run driver: failed to create PMEM namespace with size '33285996544' in region 'region0': Exit Code: 0 Started: Mon, 24 Aug 2020 09:44:03 +0200 Finished: Mon, 24 Aug 2020 09:44:03 +0200 Ready: True Restart Count: 1 To detect this and future issues like it, E2E testing now checks the driver for unexpected restarts. In tests that intentionally restart pods, the pods must be deleted after the test to reset the restart counter. --- pkg/pmem-device-manager/pmd-lvm.go | 4 +- test/e2e/deploy/deploy.go | 73 ++++++++++++++++++++++++++++++ test/e2e/storage/sanity.go | 39 +++++++++++++--- 3 files changed, 108 insertions(+), 8 deletions(-) diff --git a/pkg/pmem-device-manager/pmd-lvm.go b/pkg/pmem-device-manager/pmd-lvm.go index 0c396668b6..c970505b7c 100644 --- a/pkg/pmem-device-manager/pmd-lvm.go +++ b/pkg/pmem-device-manager/pmd-lvm.go @@ -343,7 +343,9 @@ func createNS(r *ndctl.Region, percentage uint) error { Size: canUse, Align: align, }) - return fmt.Errorf("failed to create PMEM namespace with size '%d' in region '%s': %v", canUse, r.DeviceName(), err) + if err != nil { + return fmt.Errorf("failed to create PMEM namespace with size '%d' in region '%s': %v", canUse, r.DeviceName(), err) + } } return nil diff --git a/test/e2e/deploy/deploy.go b/test/e2e/deploy/deploy.go index 8ab72771a3..100dcedecd 100644 --- a/test/e2e/deploy/deploy.go +++ b/test/e2e/deploy/deploy.go @@ -19,13 +19,16 @@ import ( "github.com/prometheus/common/expfmt" v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" apierrs "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/kubernetes/test/e2e/framework" api "github.com/intel/pmem-csi/pkg/apis/pmemcsi/v1alpha1" "github.com/onsi/ginkgo" + "github.com/onsi/gomega" ) const ( @@ -181,6 +184,29 @@ func WaitForPMEMDriver(c *Cluster, name, namespace string) (metricsURL string) { } } +// CheckPMEMDriver does some sanity checks for a running deployment. +func CheckPMEMDriver(c *Cluster, deployment *Deployment) { + pods, err := c.cs.CoreV1().Pods(deployment.Namespace).List(context.Background(), + metav1.ListOptions{ + LabelSelector: fmt.Sprintf("%s in (%s)", deploymentLabel, deployment.Name), + }, + ) + framework.ExpectNoError(err, "list PMEM-CSI pods") + gomega.Expect(len(pods.Items)).Should(gomega.BeNumerically(">", 0), "should have PMEM-CSI pods") + for _, pod := range pods.Items { + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.RestartCount > 0 { + framework.Failf("container %q in pod %q restarted %d times, last state: %+v", + containerStatus.Name, + pod.Name, + containerStatus.RestartCount, + containerStatus.LastTerminationState, + ) + } + } + } +} + // RemoveObjects deletes everything that might have been created for a // PMEM-CSI driver or operator installation (pods, daemonsets, // statefulsets, driver info, storage classes, etc.). @@ -559,6 +585,9 @@ func Parse(deploymentName string) (*Deployment, error) { // a test runs, the desired deployment exists. Deployed drivers are intentionally // kept running to speed up the execution of multiple tests that all want the // same kind of deployment. +// +// The driver should never restart. A restart would indicate some +// (potentially intermittent) issue. func EnsureDeployment(deploymentName string) *Deployment { deployment, err := Parse(deploymentName) if err != nil { @@ -588,6 +617,7 @@ func EnsureDeployment(deploymentName string) *Deployment { // Do some sanity checks on the running deployment before the test. if deployment.HasDriver { WaitForPMEMDriver(c, "pmem-csi", deployment.Namespace) + CheckPMEMDriver(c, deployment) } if deployment.HasOperator { WaitForOperator(c, deployment.Namespace) @@ -636,6 +666,7 @@ func EnsureDeployment(deploymentName string) *Deployment { // looking at the driver state. Long-term we want the operator to do that // checking itself. WaitForPMEMDriver(c, "pmem-csi", deployment.Namespace) + CheckPMEMDriver(c, deployment) } for _, h := range installHooks { @@ -695,6 +726,31 @@ func (d *Deployment) GetDriverDeployment() api.Deployment { } } +// DeleteAllPods deletes all currently running pods that belong to the deployment. +func (d Deployment) DeleteAllPods(c *Cluster) error { + listOptions := metav1.ListOptions{ + LabelSelector: fmt.Sprintf("%s in (%s)", deploymentLabel, d.Name), + } + pods, err := c.cs.CoreV1().Pods(d.Namespace).List(context.Background(), listOptions) + if err != nil { + return fmt.Errorf("list all PMEM-CSI pods: %v", err) + } + // Kick of deletion of several pods at once. + if err := c.cs.CoreV1().Pods(d.Namespace).DeleteCollection(context.Background(), + metav1.DeleteOptions{}, + listOptions, + ); err != nil { + return fmt.Errorf("delete all PMEM-CSI pods: %v", err) + } + // But still wait for every single one to be gone... + for _, pod := range pods.Items { + if err := waitForPodDeletion(c, pod); err != nil { + return fmt.Errorf("wait for pod deletion: %v", err) + } + } + return nil +} + // DescribeForAll registers tests like gomega.Describe does, except that // each test will then be invoked for each supported PMEM-CSI deployment // which has a functional PMEM-CSI driver. @@ -781,3 +837,20 @@ func DefineTests() { } } } + +// waitForPodDeletion returns an error if it takes too long for the pod to fully terminate. +func waitForPodDeletion(c *Cluster, pod v1.Pod) error { + return wait.PollImmediate(2*time.Second, time.Minute, func() (bool, error) { + existingPod, err := c.cs.CoreV1().Pods(pod.Namespace).Get(context.Background(), pod.Name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return true, nil // done + } + if err != nil { + return true, err // stop wait with error + } + if pod.UID != existingPod.UID { + return true, nil // also done (pod was restarted) + } + return false, nil + }) +} diff --git a/test/e2e/storage/sanity.go b/test/e2e/storage/sanity.go index 366b66def5..29089aeafb 100644 --- a/test/e2e/storage/sanity.go +++ b/test/e2e/storage/sanity.go @@ -204,22 +204,29 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool { // is allocated dynamically and changes when redeploying. Therefore // we register a hook which clears the connection when PMEM-CSI // gets re-deployed. + scFinalize := func() { + sc.Finalize() + // Not sure why this isn't in Finalize - a bug? + sc.Conn = nil + sc.ControllerConn = nil + } deploy.AddUninstallHook(func(deploymentName string) { framework.Logf("sanity: deployment %s is gone, closing test connections to controller %s and node %s.", deploymentName, config.ControllerAddress, config.Address) - sc.Finalize() + scFinalize() }) var _ = Describe("PMEM-CSI", func() { var ( - cl *sanity.Cleanup - nc csi.NodeClient - cc, ncc csi.ControllerClient - nodeID string - v volume - cancel func() + cl *sanity.Cleanup + nc csi.NodeClient + cc, ncc csi.ControllerClient + nodeID string + v volume + cancel func() + rebooted bool ) BeforeEach(func() { @@ -234,6 +241,7 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool { ControllerPublishSupported: true, NodeStageSupported: true, } + rebooted = false nid, err := nc.NodeGetInfo( context.Background(), &csi.NodeGetInfoRequest{}) @@ -256,6 +264,19 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool { cl.DeleteVolumes() cancel() sc.Teardown() + + if rebooted { + // Remove all cached connections, too. + scFinalize() + + // Rebooting a node increases the restart counter of + // the containers. This is normal in that case, but + // for the next test triggers the check that + // containers shouldn't restart. To get around that, + // we delete all PMEM-CSI pods after a reboot test. + By("stopping all PMEM-CSI pods after rebooting some node(s)") + d.DeleteAllPods(cluster) + } }) It("stores state across reboots for single volume", func() { @@ -273,7 +294,9 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool { createdVolumes, err := ncc.ListVolumes(context.Background(), &csi.ListVolumesRequest{}) framework.ExpectNoError(err, "Failed to list volumes after reboot") Expect(createdVolumes.Entries).To(HaveLen(len(initialVolumes.Entries)+1), "one more volume on : %s", nodeID) + // Restart. + rebooted = true restartNode(f.ClientSet, nodeID, sc) // Once we get an answer, it is expected to be the same as before. @@ -295,6 +318,7 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool { nodeID := v.publish(name, vol) // Restart. + rebooted = true restartNode(f.ClientSet, nodeID, sc) _, err := ncc.ListVolumes(context.Background(), &csi.ListVolumesRequest{}) @@ -359,6 +383,7 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool { capacity, err := cc.GetCapacity(context.Background(), &csi.GetCapacityRequest{}) framework.ExpectNoError(err, "get capacity before restart") + rebooted = true restartNode(f.ClientSet, controllerNode, sc) _, err = WaitForPodsWithLabelRunningReady(f.ClientSet, d.Namespace, From 53d710b0d874a4de7cb960612ea8e1cbeef15c7c Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Mon, 24 Aug 2020 21:29:07 +0200 Subject: [PATCH 2/3] LVM: ignore unsuitable regions during startup Creating a namespace will only work for regions of the right type. If we were to attempt creating the namespace, the function would fail and the entire driver startup would be aborted, just to fail again when tried again. There's no test for this because we have no way to simulate different PMEM configurations. --- pkg/pmem-device-manager/pmd-lvm.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/pmem-device-manager/pmd-lvm.go b/pkg/pmem-device-manager/pmd-lvm.go index c970505b7c..fda81dcba3 100644 --- a/pkg/pmem-device-manager/pmd-lvm.go +++ b/pkg/pmem-device-manager/pmd-lvm.go @@ -52,6 +52,11 @@ func NewPmemDeviceManagerLVM(pmemPercentage uint) (PmemDeviceManager, error) { for _, bus := range ctx.GetBuses() { for _, r := range bus.ActiveRegions() { vgName := pmemcommon.VgName(bus, r) + if r.Type() != ndctl.PmemRegion { + klog.Infof("Region is not suitable for fsdax, skipping it: id = %q, device %q", r.ID(), r.DeviceName()) + continue + } + if err := createNS(r, pmemPercentage); err != nil { return nil, err } From a5ee95393d5e6aceaedbd3b5a3c89f4a3606e942 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Mon, 24 Aug 2020 21:31:48 +0200 Subject: [PATCH 3/3] LVM: code improvements - Avoid the "ctx" variable name - normally used for a standard context, not consistent with other PMEM-CSI code. - Better names and documentation for functions - they do not always create something. - Avoid magic constant "2" when construction command line - no need to increase that number when changing parameters, easier to read. - ctx.Free() at the end of the loops would not get called when return from inside the loops. --- pkg/pmem-device-manager/pmd-lvm.go | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/pkg/pmem-device-manager/pmd-lvm.go b/pkg/pmem-device-manager/pmd-lvm.go index fda81dcba3..8c894ffe83 100644 --- a/pkg/pmem-device-manager/pmd-lvm.go +++ b/pkg/pmem-device-manager/pmd-lvm.go @@ -43,13 +43,14 @@ func NewPmemDeviceManagerLVM(pmemPercentage uint) (PmemDeviceManager, error) { lvmMutex.Lock() defer lvmMutex.Unlock() - ctx, err := ndctl.NewContext() + ndctx, err := ndctl.NewContext() if err != nil { return nil, err } + defer ndctx.Free() volumeGroups := []string{} - for _, bus := range ctx.GetBuses() { + for _, bus := range ndctx.GetBuses() { for _, r := range bus.ActiveRegions() { vgName := pmemcommon.VgName(bus, r) if r.Type() != ndctl.PmemRegion { @@ -57,10 +58,10 @@ func NewPmemDeviceManagerLVM(pmemPercentage uint) (PmemDeviceManager, error) { continue } - if err := createNS(r, pmemPercentage); err != nil { + if err := setupNS(r, pmemPercentage); err != nil { return nil, err } - if err := createVolumesForRegion(r, vgName); err != nil { + if err := setupVG(r, vgName); err != nil { return nil, err } if _, err := pmemexec.RunCommand("vgs", vgName); err != nil { @@ -70,7 +71,6 @@ func NewPmemDeviceManagerLVM(pmemPercentage uint) (PmemDeviceManager, error) { } } } - ctx.Free() return NewPmemDeviceManagerLVMForVGs(volumeGroups) } @@ -305,7 +305,8 @@ const ( GB uint64 = 1024 * 1024 * 1024 ) -func createNS(r *ndctl.Region, percentage uint) error { +// setupNS checks if a namespace needs to be created in the region and if so, does that. +func setupNS(r *ndctl.Region, percentage uint) error { align := GB realalign := align * r.InterleaveWays() canUse := uint64(percentage) * (r.Size() / 100) @@ -356,14 +357,15 @@ func createNS(r *ndctl.Region, percentage uint) error { return nil } -func createVolumesForRegion(r *ndctl.Region, vgName string) error { - cmd := "" - cmdArgs := []string{"--force", vgName} +// setupVG ensures that all namespaces with name "pmem-csi" in the region +// are part of the volume group. +func setupVG(r *ndctl.Region, vgName string) error { nsArray := r.ActiveNamespaces() if len(nsArray) == 0 { klog.V(3).Infof("No active namespaces in region %s", r.DeviceName()) return nil } + var devNames []string for _, ns := range nsArray { // consider only namespaces having name given by this driver, to exclude foreign ones if ns.Name() == "pmem-csi" { @@ -372,14 +374,16 @@ func createVolumesForRegion(r *ndctl.Region, vgName string) error { if not add to arg list */ output, err := pmemexec.RunCommand("pvs", "--noheadings", "-o", "vg_name", devName) if err != nil || len(strings.TrimSpace(output)) == 0 { - cmdArgs = append(cmdArgs, devName) + devNames = append(devNames, devName) } } } - if len(cmdArgs) == 2 { + if len(devNames) == 0 { klog.V(3).Infof("no new namespace found to add to this group: %s", vgName) return nil } + + cmd := "" if _, err := pmemexec.RunCommand("vgdisplay", vgName); err != nil { klog.V(3).Infof("No volume group with name %v, mark for creation", vgName) cmd = "vgcreate" @@ -388,6 +392,8 @@ func createVolumesForRegion(r *ndctl.Region, vgName string) error { cmd = "vgextend" } + cmdArgs := []string{"--force", vgName} + cmdArgs = append(cmdArgs, devNames...) _, err := pmemexec.RunCommand(cmd, cmdArgs...) //nolint gosec if err != nil { return fmt.Errorf("failed to create/extend volume group '%s': %v", vgName, err)