Skip to content
This repository was archived by the owner on Oct 22, 2024. It is now read-only.

LVM fixes #714

Merged
merged 3 commits into from
Aug 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 25 additions & 12 deletions pkg/pmem-device-manager/pmd-lvm.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,25 @@ func NewPmemDeviceManagerLVM(pmemPercentage uint) (PmemDeviceManager, error) {
lvmMutex.Lock()
defer lvmMutex.Unlock()

ctx, err := ndctl.NewContext()
ndctx, err := ndctl.NewContext()
if err != nil {
return nil, err
}
defer ndctx.Free()

volumeGroups := []string{}
for _, bus := range ctx.GetBuses() {
for _, bus := range ndctx.GetBuses() {
for _, r := range bus.ActiveRegions() {
vgName := pmemcommon.VgName(bus, r)
if err := createNS(r, pmemPercentage); err != nil {
if r.Type() != ndctl.PmemRegion {
klog.Infof("Region is not suitable for fsdax, skipping it: id = %q, device %q", r.ID(), r.DeviceName())
continue
}

if err := setupNS(r, pmemPercentage); err != nil {
return nil, err
}
if err := createVolumesForRegion(r, vgName); err != nil {
if err := setupVG(r, vgName); err != nil {
return nil, err
}
if _, err := pmemexec.RunCommand("vgs", vgName); err != nil {
Expand All @@ -65,7 +71,6 @@ func NewPmemDeviceManagerLVM(pmemPercentage uint) (PmemDeviceManager, error) {
}
}
}
ctx.Free()

return NewPmemDeviceManagerLVMForVGs(volumeGroups)
}
Expand Down Expand Up @@ -300,7 +305,8 @@ const (
GB uint64 = 1024 * 1024 * 1024
)

func createNS(r *ndctl.Region, percentage uint) error {
// setupNS checks if a namespace needs to be created in the region and if so, does that.
func setupNS(r *ndctl.Region, percentage uint) error {
align := GB
realalign := align * r.InterleaveWays()
canUse := uint64(percentage) * (r.Size() / 100)
Expand Down Expand Up @@ -343,20 +349,23 @@ func createNS(r *ndctl.Region, percentage uint) error {
Size: canUse,
Align: align,
})
return fmt.Errorf("failed to create PMEM namespace with size '%d' in region '%s': %v", canUse, r.DeviceName(), err)
if err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am surprised how this state of code ever worked, as it always returns error.... I am quite sure I wrote that if stmt there when this code was developed. But seems between commits 5365a34 and 0748a91 that whole function was moved, but leaving those (quite important ) if-lines behind. Just curious, what was reasoning to leave out if-part. And how the testing actually passed then. We dont have tests for that part?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

means, current fix is not new code, but actually restores code that was there originally.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right @okartau, It was unintentionally introduced by me while moving the code as part of removing init-container from the driver.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And how the testing actually passed then. We dont have tests for that part?

We do not have tests to cover this part of the code. Current tests cover only create, delete, and list devices on a pre-populated logical volume group.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do go through this code once when installing a driver in LVM mode for the first time. The error return then causes a restart of the container. The next instance of the container then gets passed this error because it uses the then existing namespace instead of entering this faulty branch again.

The new check for "no restarts" failed when this line wasn't fixed and passed once it was.

return fmt.Errorf("failed to create PMEM namespace with size '%d' in region '%s': %v", canUse, r.DeviceName(), err)
}
}

return nil
}

func createVolumesForRegion(r *ndctl.Region, vgName string) error {
cmd := ""
cmdArgs := []string{"--force", vgName}
// setupVG ensures that all namespaces with name "pmem-csi" in the region
// are part of the volume group.
func setupVG(r *ndctl.Region, vgName string) error {
nsArray := r.ActiveNamespaces()
if len(nsArray) == 0 {
klog.V(3).Infof("No active namespaces in region %s", r.DeviceName())
return nil
}
var devNames []string
for _, ns := range nsArray {
// consider only namespaces having name given by this driver, to exclude foreign ones
if ns.Name() == "pmem-csi" {
Expand All @@ -365,14 +374,16 @@ func createVolumesForRegion(r *ndctl.Region, vgName string) error {
if not add to arg list */
output, err := pmemexec.RunCommand("pvs", "--noheadings", "-o", "vg_name", devName)
if err != nil || len(strings.TrimSpace(output)) == 0 {
cmdArgs = append(cmdArgs, devName)
devNames = append(devNames, devName)
}
}
}
if len(cmdArgs) == 2 {
if len(devNames) == 0 {
klog.V(3).Infof("no new namespace found to add to this group: %s", vgName)
return nil
}

cmd := ""
if _, err := pmemexec.RunCommand("vgdisplay", vgName); err != nil {
klog.V(3).Infof("No volume group with name %v, mark for creation", vgName)
cmd = "vgcreate"
Expand All @@ -381,6 +392,8 @@ func createVolumesForRegion(r *ndctl.Region, vgName string) error {
cmd = "vgextend"
}

cmdArgs := []string{"--force", vgName}
cmdArgs = append(cmdArgs, devNames...)
_, err := pmemexec.RunCommand(cmd, cmdArgs...) //nolint gosec
if err != nil {
return fmt.Errorf("failed to create/extend volume group '%s': %v", vgName, err)
Expand Down
73 changes: 73 additions & 0 deletions test/e2e/deploy/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,16 @@ import (

"github.com/prometheus/common/expfmt"
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
apierrs "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/kubernetes/test/e2e/framework"

api "github.com/intel/pmem-csi/pkg/apis/pmemcsi/v1alpha1"

"github.com/onsi/ginkgo"
"github.com/onsi/gomega"
)

const (
Expand Down Expand Up @@ -181,6 +184,29 @@ func WaitForPMEMDriver(c *Cluster, name, namespace string) (metricsURL string) {
}
}

// CheckPMEMDriver does some sanity checks for a running deployment.
func CheckPMEMDriver(c *Cluster, deployment *Deployment) {
pods, err := c.cs.CoreV1().Pods(deployment.Namespace).List(context.Background(),
metav1.ListOptions{
LabelSelector: fmt.Sprintf("%s in (%s)", deploymentLabel, deployment.Name),
},
)
framework.ExpectNoError(err, "list PMEM-CSI pods")
gomega.Expect(len(pods.Items)).Should(gomega.BeNumerically(">", 0), "should have PMEM-CSI pods")
for _, pod := range pods.Items {
for _, containerStatus := range pod.Status.ContainerStatuses {
if containerStatus.RestartCount > 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should relax this condition to limit to only our driver container. Test failures also showed that driver-registrar container restarts, I am not sure if it exists after a timeout?:

[2020-08-24T22:19:20.745Z] [BeforeEach] lvm-testing
[2020-08-24T22:19:20.745Z]   /mnt/workspace/pmem-csi_PR-714/test/e2e/deploy/deploy.go:599
[2020-08-24T22:19:20.745Z] �[1mSTEP�[0m: preparing for test "lvm-testing TLS controller is secure" in namespace default
[2020-08-24T22:19:20.745Z] Aug 24 22:19:20.469: INFO: No external IP address on nodes, falling back to internal IPs
[2020-08-24T22:19:21.540Z] Aug 24 22:19:21.361: INFO: reusing existing lvm-testing PMEM-CSI components
[2020-08-24T22:19:21.540Z] Aug 24 22:19:21.361: INFO: Waiting for PMEM-CSI driver.
[2020-08-24T22:19:21.540Z] pmem-csi-controller-0/[email protected]: I0824 22:19:21.379354       1 pmem-csi-driver.go:419] HTTP request: GET "/metrics" from 10.32.0.1:11586 Go-http-client/1.1
[2020-08-24T22:19:21.540Z] Aug 24 22:19:21.390: INFO: Done with waiting, PMEM-CSI driver v0.5.0-rc1-728-g149ec0c1 is ready.
[2020-08-24T22:19:21.540Z] Aug 24 22:19:21.396: FAIL: container "driver-registrar" in pod "pmem-csi-node-bb7nz" restarted 1 times, last state: {Waiting:nil Running:nil Terminated:&ContainerStateTerminated{ExitCode:255,Signal:0,Reason:Unknown,Message:,StartedAt:2020-08-24 21:54:06 +0000 UTC,FinishedAt:2020-08-24 22:17:28 +0000 UTC,ContainerID:containerd://3c29e4b8653ef6d8fcfdf77d122151c28b6f2d22d751a7c0e220891fcf9179f1,}}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or we fix this problem. If it's because of a timeout, then we may be able to increase that timeout.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's much simpler: that happened during the reboot tests, so of course all containers were restarted...

I'm not sure yet how to deal with this. The check is useful because it highlights a problem that went unnoticed without it for a while. Perhaps clean up after the reboot tests by killing all pods?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, checking if the pod container restart count == node reboot count might work.

Copy link
Contributor Author

@pohly pohly Aug 25, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've implemented the "delete pods" approach.

Checking for node reboots is fragile and more complex. Is there even a counter for node restarts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@avalluri okay now? Tests have passed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we can merge.

framework.Failf("container %q in pod %q restarted %d times, last state: %+v",
containerStatus.Name,
pod.Name,
containerStatus.RestartCount,
containerStatus.LastTerminationState,
)
}
}
}
}

// RemoveObjects deletes everything that might have been created for a
// PMEM-CSI driver or operator installation (pods, daemonsets,
// statefulsets, driver info, storage classes, etc.).
Expand Down Expand Up @@ -559,6 +585,9 @@ func Parse(deploymentName string) (*Deployment, error) {
// a test runs, the desired deployment exists. Deployed drivers are intentionally
// kept running to speed up the execution of multiple tests that all want the
// same kind of deployment.
//
// The driver should never restart. A restart would indicate some
// (potentially intermittent) issue.
func EnsureDeployment(deploymentName string) *Deployment {
deployment, err := Parse(deploymentName)
if err != nil {
Expand Down Expand Up @@ -588,6 +617,7 @@ func EnsureDeployment(deploymentName string) *Deployment {
// Do some sanity checks on the running deployment before the test.
if deployment.HasDriver {
WaitForPMEMDriver(c, "pmem-csi", deployment.Namespace)
CheckPMEMDriver(c, deployment)
}
if deployment.HasOperator {
WaitForOperator(c, deployment.Namespace)
Expand Down Expand Up @@ -636,6 +666,7 @@ func EnsureDeployment(deploymentName string) *Deployment {
// looking at the driver state. Long-term we want the operator to do that
// checking itself.
WaitForPMEMDriver(c, "pmem-csi", deployment.Namespace)
CheckPMEMDriver(c, deployment)
}

for _, h := range installHooks {
Expand Down Expand Up @@ -695,6 +726,31 @@ func (d *Deployment) GetDriverDeployment() api.Deployment {
}
}

// DeleteAllPods deletes all currently running pods that belong to the deployment.
func (d Deployment) DeleteAllPods(c *Cluster) error {
listOptions := metav1.ListOptions{
LabelSelector: fmt.Sprintf("%s in (%s)", deploymentLabel, d.Name),
}
pods, err := c.cs.CoreV1().Pods(d.Namespace).List(context.Background(), listOptions)
if err != nil {
return fmt.Errorf("list all PMEM-CSI pods: %v", err)
}
// Kick of deletion of several pods at once.
if err := c.cs.CoreV1().Pods(d.Namespace).DeleteCollection(context.Background(),
metav1.DeleteOptions{},
listOptions,
); err != nil {
return fmt.Errorf("delete all PMEM-CSI pods: %v", err)
}
// But still wait for every single one to be gone...
for _, pod := range pods.Items {
if err := waitForPodDeletion(c, pod); err != nil {
return fmt.Errorf("wait for pod deletion: %v", err)
}
}
return nil
}

// DescribeForAll registers tests like gomega.Describe does, except that
// each test will then be invoked for each supported PMEM-CSI deployment
// which has a functional PMEM-CSI driver.
Expand Down Expand Up @@ -781,3 +837,20 @@ func DefineTests() {
}
}
}

// waitForPodDeletion returns an error if it takes too long for the pod to fully terminate.
func waitForPodDeletion(c *Cluster, pod v1.Pod) error {
return wait.PollImmediate(2*time.Second, time.Minute, func() (bool, error) {
existingPod, err := c.cs.CoreV1().Pods(pod.Namespace).Get(context.Background(), pod.Name, metav1.GetOptions{})
if apierrors.IsNotFound(err) {
return true, nil // done
}
if err != nil {
return true, err // stop wait with error
}
if pod.UID != existingPod.UID {
return true, nil // also done (pod was restarted)
}
return false, nil
})
}
39 changes: 32 additions & 7 deletions test/e2e/storage/sanity.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,22 +204,29 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
// is allocated dynamically and changes when redeploying. Therefore
// we register a hook which clears the connection when PMEM-CSI
// gets re-deployed.
scFinalize := func() {
sc.Finalize()
// Not sure why this isn't in Finalize - a bug?
sc.Conn = nil
sc.ControllerConn = nil
}
deploy.AddUninstallHook(func(deploymentName string) {
framework.Logf("sanity: deployment %s is gone, closing test connections to controller %s and node %s.",
deploymentName,
config.ControllerAddress,
config.Address)
sc.Finalize()
scFinalize()
})

var _ = Describe("PMEM-CSI", func() {
var (
cl *sanity.Cleanup
nc csi.NodeClient
cc, ncc csi.ControllerClient
nodeID string
v volume
cancel func()
cl *sanity.Cleanup
nc csi.NodeClient
cc, ncc csi.ControllerClient
nodeID string
v volume
cancel func()
rebooted bool
)

BeforeEach(func() {
Expand All @@ -234,6 +241,7 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
ControllerPublishSupported: true,
NodeStageSupported: true,
}
rebooted = false
nid, err := nc.NodeGetInfo(
context.Background(),
&csi.NodeGetInfoRequest{})
Expand All @@ -256,6 +264,19 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
cl.DeleteVolumes()
cancel()
sc.Teardown()

if rebooted {
// Remove all cached connections, too.
scFinalize()

// Rebooting a node increases the restart counter of
// the containers. This is normal in that case, but
// for the next test triggers the check that
// containers shouldn't restart. To get around that,
// we delete all PMEM-CSI pods after a reboot test.
By("stopping all PMEM-CSI pods after rebooting some node(s)")
d.DeleteAllPods(cluster)
}
})

It("stores state across reboots for single volume", func() {
Expand All @@ -273,7 +294,9 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
createdVolumes, err := ncc.ListVolumes(context.Background(), &csi.ListVolumesRequest{})
framework.ExpectNoError(err, "Failed to list volumes after reboot")
Expect(createdVolumes.Entries).To(HaveLen(len(initialVolumes.Entries)+1), "one more volume on : %s", nodeID)

// Restart.
rebooted = true
restartNode(f.ClientSet, nodeID, sc)

// Once we get an answer, it is expected to be the same as before.
Expand All @@ -295,6 +318,7 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
nodeID := v.publish(name, vol)

// Restart.
rebooted = true
restartNode(f.ClientSet, nodeID, sc)

_, err := ncc.ListVolumes(context.Background(), &csi.ListVolumesRequest{})
Expand Down Expand Up @@ -359,6 +383,7 @@ var _ = deploy.DescribeForSome("sanity", func(d *deploy.Deployment) bool {
capacity, err := cc.GetCapacity(context.Background(), &csi.GetCapacityRequest{})
framework.ExpectNoError(err, "get capacity before restart")

rebooted = true
restartNode(f.ClientSet, controllerNode, sc)

_, err = WaitForPodsWithLabelRunningReady(f.ClientSet, d.Namespace,
Expand Down