intel · avalluri · Aug 25, 2020 · Aug 24, 2020 · Aug 24, 2020 · Aug 24, 2020
diff --git a/pkg/pmem-device-manager/pmd-lvm.go b/pkg/pmem-device-manager/pmd-lvm.go
@@ -43,19 +43,25 @@ func NewPmemDeviceManagerLVM(pmemPercentage uint) (PmemDeviceManager, error) {
 	lvmMutex.Lock()
 	defer lvmMutex.Unlock()
 
-	ctx, err := ndctl.NewContext()
+	ndctx, err := ndctl.NewContext()
 	if err != nil {
 		return nil, err
 	}
+	defer ndctx.Free()
 
 	volumeGroups := []string{}
-	for _, bus := range ctx.GetBuses() {
+	for _, bus := range ndctx.GetBuses() {
 		for _, r := range bus.ActiveRegions() {
 			vgName := pmemcommon.VgName(bus, r)
-			if err := createNS(r, pmemPercentage); err != nil {
+			if r.Type() != ndctl.PmemRegion {
+				klog.Infof("Region is not suitable for fsdax, skipping it: id = %q, device %q", r.ID(), r.DeviceName())
+				continue
+			}
+
+			if err := setupNS(r, pmemPercentage); err != nil {
 				return nil, err
 			}
-			if err := createVolumesForRegion(r, vgName); err != nil {
+			if err := setupVG(r, vgName); err != nil {
 				return nil, err
 			}
 			if _, err := pmemexec.RunCommand("vgs", vgName); err != nil {
@@ -65,7 +71,6 @@ func NewPmemDeviceManagerLVM(pmemPercentage uint) (PmemDeviceManager, error) {
 			}
 		}
 	}
-	ctx.Free()
 
 	return NewPmemDeviceManagerLVMForVGs(volumeGroups)
 }
@@ -300,7 +305,8 @@ const (
 	GB uint64 = 1024 * 1024 * 1024
 )
 
-func createNS(r *ndctl.Region, percentage uint) error {
+// setupNS checks if a namespace needs to be created in the region and if so, does that.
+func setupNS(r *ndctl.Region, percentage uint) error {
 	align := GB
 	realalign := align * r.InterleaveWays()
 	canUse := uint64(percentage) * (r.Size() / 100)
@@ -343,20 +349,23 @@ func createNS(r *ndctl.Region, percentage uint) error {
 			Size:  canUse,
 			Align: align,
 		})
-		return fmt.Errorf("failed to create PMEM namespace with size '%d' in region '%s': %v", canUse, r.DeviceName(), err)
+		if err != nil {
+			return fmt.Errorf("failed to create PMEM namespace with size '%d' in region '%s': %v", canUse, r.DeviceName(), err)
+		}
 	}
 
 	return nil
 }
 
-func createVolumesForRegion(r *ndctl.Region, vgName string) error {
-	cmd := ""
-	cmdArgs := []string{"--force", vgName}
+// setupVG ensures that all namespaces with name "pmem-csi" in the region
+// are part of the volume group.
+func setupVG(r *ndctl.Region, vgName string) error {
 	nsArray := r.ActiveNamespaces()
 	if len(nsArray) == 0 {
 		klog.V(3).Infof("No active namespaces in region %s", r.DeviceName())
 		return nil
 	}
+	var devNames []string
 	for _, ns := range nsArray {
 		// consider only namespaces having name given by this driver, to exclude foreign ones
 		if ns.Name() == "pmem-csi" {
@@ -365,14 +374,16 @@ func createVolumesForRegion(r *ndctl.Region, vgName string) error {
 			if not add to arg list */
 			output, err := pmemexec.RunCommand("pvs", "--noheadings", "-o", "vg_name", devName)
 			if err != nil || len(strings.TrimSpace(output)) == 0 {
-				cmdArgs = append(cmdArgs, devName)
+				devNames = append(devNames, devName)
 			}
 		}
 	}
-	if len(cmdArgs) == 2 {
+	if len(devNames) == 0 {
 		klog.V(3).Infof("no new namespace found to add to this group: %s", vgName)
 		return nil
 	}
+
+	cmd := ""
 	if _, err := pmemexec.RunCommand("vgdisplay", vgName); err != nil {
 		klog.V(3).Infof("No volume group with name %v, mark for creation", vgName)
 		cmd = "vgcreate"
@@ -381,6 +392,8 @@ func createVolumesForRegion(r *ndctl.Region, vgName string) error {
 		cmd = "vgextend"
 	}
 
+	cmdArgs := []string{"--force", vgName}
+	cmdArgs = append(cmdArgs, devNames...)
 	_, err := pmemexec.RunCommand(cmd, cmdArgs...) //nolint gosec
 	if err != nil {
 		return fmt.Errorf("failed to create/extend volume group '%s': %v", vgName, err)

diff --git a/test/e2e/deploy/deploy.go b/test/e2e/deploy/deploy.go
@@ -19,13 +19,16 @@ import (
 
 	"github.com/prometheus/common/expfmt"
 	v1 "k8s.io/api/core/v1"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
 	apierrs "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/kubernetes/test/e2e/framework"
 
 	api "github.com/intel/pmem-csi/pkg/apis/pmemcsi/v1alpha1"
 
 	"github.com/onsi/ginkgo"
+	"github.com/onsi/gomega"
 )
 
 const (
@@ -181,6 +184,29 @@ func WaitForPMEMDriver(c *Cluster, name, namespace string) (metricsURL string) {
 	}
 }
 
+// CheckPMEMDriver does some sanity checks for a running deployment.
+func CheckPMEMDriver(c *Cluster, deployment *Deployment) {
+	pods, err := c.cs.CoreV1().Pods(deployment.Namespace).List(context.Background(),
+		metav1.ListOptions{
+			LabelSelector: fmt.Sprintf("%s in (%s)", deploymentLabel, deployment.Name),
+		},
+	)
+	framework.ExpectNoError(err, "list PMEM-CSI pods")
+	gomega.Expect(len(pods.Items)).Should(gomega.BeNumerically(">", 0), "should have PMEM-CSI pods")
+	for _, pod := range pods.Items {
+		for _, containerStatus := range pod.Status.ContainerStatuses {
+			if containerStatus.RestartCount > 0 {
+				framework.Failf("container %q in pod %q restarted %d times, last state: %+v",
+					containerStatus.Name,
+					pod.Name,
+					containerStatus.RestartCount,
+					containerStatus.LastTerminationState,
+				)
+			}
+		}
+	}
+}
+
 // RemoveObjects deletes everything that might have been created for a
 // PMEM-CSI driver or operator installation (pods, daemonsets,
 // statefulsets, driver info, storage classes, etc.).
@@ -559,6 +585,9 @@ func Parse(deploymentName string) (*Deployment, error) {
 // a test runs, the desired deployment exists. Deployed drivers are intentionally
 // kept running to speed up the execution of multiple tests that all want the
 // same kind of deployment.
+//
+// The driver should never restart. A restart would indicate some
+// (potentially intermittent) issue.
 func EnsureDeployment(deploymentName string) *Deployment {
 	deployment, err := Parse(deploymentName)
 	if err != nil {
@@ -588,6 +617,7 @@ func EnsureDeployment(deploymentName string) *Deployment {
 				// Do some sanity checks on the running deployment before the test.
 				if deployment.HasDriver {
 					WaitForPMEMDriver(c, "pmem-csi", deployment.Namespace)
+					CheckPMEMDriver(c, deployment)
 				}
 				if deployment.HasOperator {
 					WaitForOperator(c, deployment.Namespace)
@@ -636,6 +666,7 @@ func EnsureDeployment(deploymentName string) *Deployment {
 			// looking at the driver state. Long-term we want the operator to do that
 			// checking itself.
 			WaitForPMEMDriver(c, "pmem-csi", deployment.Namespace)
+			CheckPMEMDriver(c, deployment)
 		}
 
 		for _, h := range installHooks {
@@ -695,6 +726,31 @@ func (d *Deployment) GetDriverDeployment() api.Deployment {
 	}
 }
 
+// DeleteAllPods deletes all currently running pods that belong to the deployment.
+func (d Deployment) DeleteAllPods(c *Cluster) error {
+	listOptions := metav1.ListOptions{
+		LabelSelector: fmt.Sprintf("%s in (%s)", deploymentLabel, d.Name),
+	}
+	pods, err := c.cs.CoreV1().Pods(d.Namespace).List(context.Background(), listOptions)
+	if err != nil {
+		return fmt.Errorf("list all PMEM-CSI pods: %v", err)
+	}
+	// Kick of deletion of several pods at once.
+	if err := c.cs.CoreV1().Pods(d.Namespace).DeleteCollection(context.Background(),
+		metav1.DeleteOptions{},
+		listOptions,
+	); err != nil {
+		return fmt.Errorf("delete all PMEM-CSI pods: %v", err)
+	}
+	// But still wait for every single one to be gone...
+	for _, pod := range pods.Items {
+		if err := waitForPodDeletion(c, pod); err != nil {
+			return fmt.Errorf("wait for pod deletion: %v", err)
+		}
+	}
+	return nil
+}
+
 // DescribeForAll registers tests like gomega.Describe does, except that
 // each test will then be invoked for each supported PMEM-CSI deployment
 // which has a functional PMEM-CSI driver.
@@ -781,3 +837,20 @@ func DefineTests() {
 		}
 	}
 }
+
+// waitForPodDeletion returns an error if it takes too long for the pod to fully terminate.
+func waitForPodDeletion(c *Cluster, pod v1.Pod) error {
+	return wait.PollImmediate(2*time.Second, time.Minute, func() (bool, error) {
+		existingPod, err := c.cs.CoreV1().Pods(pod.Namespace).Get(context.Background(), pod.Name, metav1.GetOptions{})
+		if apierrors.IsNotFound(err) {
+			return true, nil // done
+		}
+		if err != nil {
+			return true, err // stop wait with error
+		}
+		if pod.UID != existingPod.UID {
+			return true, nil // also done (pod was restarted)
+		}
+		return false, nil
+	})
+}
diff --git a/test/e2e/storage/sanity.go b/test/e2e/storage/sanity.go
@@ -204,22 +204,29 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
 	// is allocated dynamically and changes when redeploying. Therefore
 	// we register a hook which clears the connection when PMEM-CSI
 	// gets re-deployed.
+	scFinalize := func() {
+		sc.Finalize()
+		// Not sure why this isn't in Finalize - a bug?
+		sc.Conn = nil
+		sc.ControllerConn = nil
+	}
 	deploy.AddUninstallHook(func(deploymentName string) {
 		framework.Logf("sanity: deployment %s is gone, closing test connections to controller %s and node %s.",
 			deploymentName,
 			config.ControllerAddress,
 			config.Address)
-		sc.Finalize()
+		scFinalize()
 	})
 
 	var _ = Describe("PMEM-CSI", func() {
 		var (
-			cl      *sanity.Cleanup
-			nc      csi.NodeClient
-			cc, ncc csi.ControllerClient
-			nodeID  string
-			v       volume
-			cancel  func()
+			cl       *sanity.Cleanup
+			nc       csi.NodeClient
+			cc, ncc  csi.ControllerClient
+			nodeID   string
+			v        volume
+			cancel   func()
+			rebooted bool
 		)
 
 		BeforeEach(func() {
@@ -234,6 +241,7 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
 				ControllerPublishSupported: true,
 				NodeStageSupported:         true,
 			}
+			rebooted = false
 			nid, err := nc.NodeGetInfo(
 				context.Background(),
 				&csi.NodeGetInfoRequest{})
@@ -256,6 +264,19 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
 			cl.DeleteVolumes()
 			cancel()
 			sc.Teardown()
+
+			if rebooted {
+				// Remove all cached connections, too.
+				scFinalize()
+
+				// Rebooting a node increases the restart counter of
+				// the containers. This is normal in that case, but
+				// for the next test triggers the check that
+				// containers shouldn't restart. To get around that,
+				// we delete all PMEM-CSI pods after a reboot test.
+				By("stopping all PMEM-CSI pods after rebooting some node(s)")
+				d.DeleteAllPods(cluster)
+			}
 		})
 
 		It("stores state across reboots for single volume", func() {
@@ -273,7 +294,9 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
 			createdVolumes, err := ncc.ListVolumes(context.Background(), &csi.ListVolumesRequest{})
 			framework.ExpectNoError(err, "Failed to list volumes after reboot")
 			Expect(createdVolumes.Entries).To(HaveLen(len(initialVolumes.Entries)+1), "one more volume on : %s", nodeID)
+
 			// Restart.
+			rebooted = true
 			restartNode(f.ClientSet, nodeID, sc)
 
 			// Once we get an answer, it is expected to be the same as before.
@@ -295,6 +318,7 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
 			nodeID := v.publish(name, vol)
 
 			// Restart.
+			rebooted = true
 			restartNode(f.ClientSet, nodeID, sc)
 
 			_, err := ncc.ListVolumes(context.Background(), &csi.ListVolumesRequest{})
@@ -359,6 +383,7 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
 			capacity, err := cc.GetCapacity(context.Background(), &csi.GetCapacityRequest{})
 			framework.ExpectNoError(err, "get capacity before restart")
 
+			rebooted = true
 			restartNode(f.ClientSet, controllerNode, sc)
 
 			_, err = WaitForPodsWithLabelRunningReady(f.ClientSet, d.Namespace,