From 2bfad4b0dc634c1fda5f4b7be80ddd1f6ef6ecd1 Mon Sep 17 00:00:00 2001 From: Erik Sipsma Date: Wed, 17 Nov 2021 12:00:17 -0800 Subject: [PATCH 1/3] Change integration.Test from a func to a interface Using an interface instead of a func is more flexible while achieving the same effect. It allows you to succintly define a large number of test cases as structs, as is common in table-driven testing. A helper func is added that converts the existing test funcs into the interface, so the change is fairly seamless. Signed-off-by: Erik Sipsma --- client/build_test.go | 12 +++---- client/client_test.go | 12 +++---- cmd/buildctl/buildctl_test.go | 4 +-- .../dockerfile/dockerfile_heredoc_test.go | 4 +-- frontend/dockerfile/dockerfile_mount_test.go | 6 ++-- .../dockerfile/dockerfile_runnetwork_test.go | 4 +-- .../dockerfile/dockerfile_runsecurity_test.go | 4 +-- .../dockerfile/dockerfile_secrets_test.go | 4 +-- frontend/dockerfile/dockerfile_ssh_test.go | 4 +-- frontend/dockerfile/dockerfile_test.go | 8 ++--- frontend/frontend_test.go | 4 +-- solver/jobs_test.go | 4 +-- util/testutil/integration/run.go | 31 +++++++++++++++++-- worker/containerd/containerd_test.go | 4 +-- 14 files changed, 65 insertions(+), 40 deletions(-) diff --git a/client/build_test.go b/client/build_test.go index 4e26b6e3d100..ed704357c053 100644 --- a/client/build_test.go +++ b/client/build_test.go @@ -33,7 +33,7 @@ import ( ) func TestClientGatewayIntegration(t *testing.T) { - integration.Run(t, []integration.Test{ + integration.Run(t, integration.TestFuncs( testClientGatewaySolve, testClientGatewayFailedSolve, testClientGatewayEmptySolve, @@ -53,20 +53,20 @@ func TestClientGatewayIntegration(t *testing.T) { testClientGatewayExecFileActionError, testClientGatewayContainerExtraHosts, testWarnings, - }, integration.WithMirroredImages(integration.OfficialImages("busybox:latest"))) + ), integration.WithMirroredImages(integration.OfficialImages("busybox:latest"))) - integration.Run(t, []integration.Test{ + integration.Run(t, integration.TestFuncs( testClientGatewayContainerSecurityMode, - }, integration.WithMirroredImages(integration.OfficialImages("busybox:latest")), + ), integration.WithMirroredImages(integration.OfficialImages("busybox:latest")), integration.WithMatrix("secmode", map[string]interface{}{ "sandbox": securitySandbox, "insecure": securityInsecure, }), ) - integration.Run(t, []integration.Test{ + integration.Run(t, integration.TestFuncs( testClientGatewayContainerHostNetworking, - }, + ), integration.WithMirroredImages(integration.OfficialImages("busybox:latest")), integration.WithMatrix("netmode", map[string]interface{}{ "default": defaultNetwork, diff --git a/client/client_test.go b/client/client_test.go index 31a1966a158d..092437e00311 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -72,7 +72,7 @@ func TestIntegration(t *testing.T) { mirroredImages["tonistiigi/test:nolayers"] = "docker.io/tonistiigi/test:nolayers" mirrors := integration.WithMirroredImages(mirroredImages) - integration.Run(t, []integration.Test{ + integration.Run(t, integration.TestFuncs( testCacheExportCacheKeyLoop, testRelativeWorkDir, testFileOpMkdirMkfile, @@ -140,13 +140,13 @@ func TestIntegration(t *testing.T) { testMergeOp, testMergeOpCache, testRmSymlink, - }, mirrors) + ), mirrors) - integration.Run(t, []integration.Test{ + integration.Run(t, integration.TestFuncs( testSecurityMode, testSecurityModeSysfs, testSecurityModeErrors, - }, + ), mirrors, integration.WithMatrix("secmode", map[string]interface{}{ "sandbox": securitySandbox, @@ -154,9 +154,9 @@ func TestIntegration(t *testing.T) { }), ) - integration.Run(t, []integration.Test{ + integration.Run(t, integration.TestFuncs( testHostNetworking, - }, + ), mirrors, integration.WithMatrix("netmode", map[string]interface{}{ "default": defaultNetwork, diff --git a/cmd/buildctl/buildctl_test.go b/cmd/buildctl/buildctl_test.go index 71bcad7bc48b..e3c7697bbb13 100644 --- a/cmd/buildctl/buildctl_test.go +++ b/cmd/buildctl/buildctl_test.go @@ -13,7 +13,7 @@ func init() { } func TestCLIIntegration(t *testing.T) { - integration.Run(t, []integration.Test{ + integration.Run(t, integration.TestFuncs( testDiskUsage, testBuildWithLocalFiles, testBuildLocalExporter, @@ -21,7 +21,7 @@ func TestCLIIntegration(t *testing.T) { testBuildMetadataFile, testPrune, testUsage, - }, + ), integration.WithMirroredImages(integration.OfficialImages("busybox:latest")), ) } diff --git a/frontend/dockerfile/dockerfile_heredoc_test.go b/frontend/dockerfile/dockerfile_heredoc_test.go index 102b0dffcf7b..6fbad6d953a0 100644 --- a/frontend/dockerfile/dockerfile_heredoc_test.go +++ b/frontend/dockerfile/dockerfile_heredoc_test.go @@ -18,7 +18,7 @@ import ( "github.com/stretchr/testify/require" ) -var hdTests = []integration.Test{ +var hdTests = integration.TestFuncs( testCopyHeredoc, testRunBasicHeredoc, testRunFakeHeredoc, @@ -27,7 +27,7 @@ var hdTests = []integration.Test{ testHeredocIndent, testHeredocVarSubstitution, testOnBuildHeredoc, -} +) func init() { heredocTests = append(heredocTests, hdTests...) diff --git a/frontend/dockerfile/dockerfile_mount_test.go b/frontend/dockerfile/dockerfile_mount_test.go index 93666f9ad2f4..731d5f2d78dc 100644 --- a/frontend/dockerfile/dockerfile_mount_test.go +++ b/frontend/dockerfile/dockerfile_mount_test.go @@ -14,7 +14,7 @@ import ( "github.com/stretchr/testify/require" ) -var mountTests = []integration.Test{ +var mountTests = integration.TestFuncs( testMountContext, testMountTmpfs, testMountRWCache, @@ -26,12 +26,12 @@ var mountTests = []integration.Test{ testMountFromError, testMountInvalid, testMountTmpfsSize, -} +) func init() { allTests = append(allTests, mountTests...) - fileOpTests = append(fileOpTests, testCacheMountUser) + fileOpTests = append(fileOpTests, integration.TestFuncs(testCacheMountUser)...) } func testMountContext(t *testing.T, sb integration.Sandbox) { diff --git a/frontend/dockerfile/dockerfile_runnetwork_test.go b/frontend/dockerfile/dockerfile_runnetwork_test.go index a8fccac1705c..8244d48f5716 100644 --- a/frontend/dockerfile/dockerfile_runnetwork_test.go +++ b/frontend/dockerfile/dockerfile_runnetwork_test.go @@ -15,12 +15,12 @@ import ( "github.com/stretchr/testify/require" ) -var runNetworkTests = []integration.Test{ +var runNetworkTests = integration.TestFuncs( testRunDefaultNetwork, testRunNoNetwork, testRunHostNetwork, testRunGlobalNetwork, -} +) func init() { networkTests = append(networkTests, runNetworkTests...) diff --git a/frontend/dockerfile/dockerfile_runsecurity_test.go b/frontend/dockerfile/dockerfile_runsecurity_test.go index 358db6733554..1b25fbcf4ce5 100644 --- a/frontend/dockerfile/dockerfile_runsecurity_test.go +++ b/frontend/dockerfile/dockerfile_runsecurity_test.go @@ -15,12 +15,12 @@ import ( "github.com/stretchr/testify/require" ) -var runSecurityTests = []integration.Test{ +var runSecurityTests = integration.TestFuncs( testRunSecurityInsecure, testRunSecuritySandbox, testRunSecurityDefault, testInsecureDevicesWhitelist, -} +) func init() { securityOpts = []integration.TestOpt{ diff --git a/frontend/dockerfile/dockerfile_secrets_test.go b/frontend/dockerfile/dockerfile_secrets_test.go index 519d7bd91e8f..ae00fac07df6 100644 --- a/frontend/dockerfile/dockerfile_secrets_test.go +++ b/frontend/dockerfile/dockerfile_secrets_test.go @@ -13,10 +13,10 @@ import ( "github.com/stretchr/testify/require" ) -var secretsTests = []integration.Test{ +var secretsTests = integration.TestFuncs( testSecretFileParams, testSecretRequiredWithoutValue, -} +) func init() { allTests = append(allTests, secretsTests...) diff --git a/frontend/dockerfile/dockerfile_ssh_test.go b/frontend/dockerfile/dockerfile_ssh_test.go index 535114a2b499..9515aad12882 100644 --- a/frontend/dockerfile/dockerfile_ssh_test.go +++ b/frontend/dockerfile/dockerfile_ssh_test.go @@ -19,9 +19,9 @@ import ( "github.com/stretchr/testify/require" ) -var sshTests = []integration.Test{ +var sshTests = integration.TestFuncs( testSSHSocketParams, -} +) func init() { allTests = append(allTests, sshTests...) diff --git a/frontend/dockerfile/dockerfile_test.go b/frontend/dockerfile/dockerfile_test.go index 2f73e523d7de..66856008f1c2 100644 --- a/frontend/dockerfile/dockerfile_test.go +++ b/frontend/dockerfile/dockerfile_test.go @@ -58,7 +58,7 @@ func init() { } } -var allTests = []integration.Test{ +var allTests = integration.TestFuncs( testCmdShell, testGlobalArg, testDockerfileDirs, @@ -118,9 +118,9 @@ var allTests = []integration.Test{ testShmSize, testUlimit, testCgroupParent, -} +) -var fileOpTests = []integration.Test{ +var fileOpTests = integration.TestFuncs( testEmptyDestDir, testCopyChownCreateDest, testCopyThroughSymlinkContext, @@ -143,7 +143,7 @@ var fileOpTests = []integration.Test{ testWorkdirCopyIgnoreRelative, testCopyFollowAllSymlinks, testDockerfileAddChownExpand, -} +) // Tests that depend on the `security.*` entitlements var securityTests = []integration.Test{} diff --git a/frontend/frontend_test.go b/frontend/frontend_test.go index 42a4bd6905d7..b1d061a265eb 100644 --- a/frontend/frontend_test.go +++ b/frontend/frontend_test.go @@ -28,12 +28,12 @@ func init() { } func TestFrontendIntegration(t *testing.T) { - integration.Run(t, []integration.Test{ + integration.Run(t, integration.TestFuncs( testRefReadFile, testRefReadDir, testRefStatFile, testReturnNil, - }) + )) } func testReturnNil(t *testing.T, sb integration.Sandbox) { diff --git a/solver/jobs_test.go b/solver/jobs_test.go index 6f5d4fb30294..091bd2f8e927 100644 --- a/solver/jobs_test.go +++ b/solver/jobs_test.go @@ -20,9 +20,9 @@ func init() { func TestJobsIntegration(t *testing.T) { mirrors := integration.WithMirroredImages(integration.OfficialImages("busybox:latest")) - integration.Run(t, []integration.Test{ + integration.Run(t, integration.TestFuncs( testParallelism, - }, + ), mirrors, integration.WithMatrix("max-parallelism", map[string]interface{}{ "single": maxParallelismSingle, diff --git a/util/testutil/integration/run.go b/util/testutil/integration/run.go index 66d64efa3b6f..1e9cce49a20d 100644 --- a/util/testutil/integration/run.go +++ b/util/testutil/integration/run.go @@ -68,7 +68,32 @@ type ConfigUpdater interface { UpdateConfigFile(string) string } -type Test func(*testing.T, Sandbox) +type Test interface { + Name() string + Run(t *testing.T, sb Sandbox) +} + +type testFunc struct { + name string + run func(t *testing.T, sb Sandbox) +} + +func (f testFunc) Name() string { + return f.name +} + +func (f testFunc) Run(t *testing.T, sb Sandbox) { + t.Helper() + f.run(t, sb) +} + +func TestFuncs(funcs ...func(t *testing.T, sb Sandbox)) []Test { + var tests []Test + for _, f := range funcs { + tests = append(tests, testFunc{name: getFunctionName(f), run: f}) + } + return tests +} var defaultWorkers []Worker @@ -152,7 +177,7 @@ func Run(t *testing.T, testCases []Test, opt ...TestOpt) { for _, br := range list { for _, tc := range testCases { for _, mv := range matrix { - fn := getFunctionName(tc) + fn := tc.Name() name := fn + "/worker=" + br.Name() + mv.functionSuffix() func(fn, testName string, br Worker, tc Test, mv matrixValue) { ok := t.Run(testName, func(t *testing.T) { @@ -173,7 +198,7 @@ func Run(t *testing.T, testCases []Test, opt ...TestOpt) { sb.PrintLogs(t) } }() - tc(t, sb) + tc.Run(t, sb) }) require.True(t, ok) }(fn, name, br, tc, mv) diff --git a/worker/containerd/containerd_test.go b/worker/containerd/containerd_test.go index 7226b6ae0faf..b93f07d744b7 100644 --- a/worker/containerd/containerd_test.go +++ b/worker/containerd/containerd_test.go @@ -22,10 +22,10 @@ func init() { func TestContainerdWorkerIntegration(t *testing.T) { checkRequirement(t) - integration.Run(t, []integration.Test{ + integration.Run(t, integration.TestFuncs( testContainerdWorkerExec, testContainerdWorkerExecFailures, - }) + )) } func newWorkerOpt(t *testing.T, addr string) (base.WorkerOpt, func()) { From abf373a3b604c487d2ec225c6d4607bfd0f34b94 Mon Sep 17 00:00:00 2001 From: Erik Sipsma Date: Wed, 24 Nov 2021 16:53:26 -0800 Subject: [PATCH 2/3] cache: Disable overlay diff for native snapshotter Before this change, test cases were running with an env var that forces the overlay differ to be on even when the native snapshotter was being used, which resulted in failures. Now, that env var is skipped when using the native snapshotter. Additionally, this includes a related change to skip even trying to use the overlay differ when the native snapshotter is in use. Previously, the blob creation code first tried to use the overlay differ and then failed and fell back to the double-walking differ. Now, it just jumps right to the double-walking differ when the native snapshotter is in use. Signed-off-by: Erik Sipsma --- cache/blobs.go | 2 +- util/testutil/integration/containerd.go | 4 ++++ util/testutil/integration/oci.go | 7 ++++++- util/testutil/integration/sandbox.go | 3 --- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/cache/blobs.go b/cache/blobs.go index 79ea3c711c6f..ddb47f51772a 100644 --- a/cache/blobs.go +++ b/cache/blobs.go @@ -137,7 +137,7 @@ func computeBlobChain(ctx context.Context, sr *immutableRef, createIfNeeded bool case "overlayfs", "stargz": // overlayfs-based snapshotters should support overlay diff. so print warn log on failure. logWarnOnErr = true - case "fuse-overlayfs": + case "fuse-overlayfs", "native": // not supported with fuse-overlayfs snapshotter which doesn't provide overlayfs mounts. // TODO: add support for fuse-overlayfs enableOverlay = false diff --git a/util/testutil/integration/containerd.go b/util/testutil/integration/containerd.go index dc65932e24e9..50c03c2ca2f6 100644 --- a/util/testutil/integration/containerd.go +++ b/util/testutil/integration/containerd.go @@ -9,6 +9,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "strings" "time" @@ -148,6 +149,9 @@ disabled_plugins = ["cri"] "--containerd-worker-labels=org.mobyproject.buildkit.worker.sandbox=true", // Include use of --containerd-worker-labels to trigger https://github.com/moby/buildkit/pull/603 }, snBuildkitdArgs...) + if runtime.GOOS != "windows" && c.snapshotter != "native" { + c.extraEnv = append(c.extraEnv, "BUILDKIT_DEBUG_FORCE_OVERLAY_DIFF=true") + } buildkitdSock, stop, err := runBuildkitd(ctx, cfg, buildkitdArgs, cfg.Logs, 0, 0, c.extraEnv) if err != nil { printLogs(cfg.Logs, log.Println) diff --git a/util/testutil/integration/oci.go b/util/testutil/integration/oci.go index 7de99a95697e..ce71643c7041 100644 --- a/util/testutil/integration/oci.go +++ b/util/testutil/integration/oci.go @@ -5,6 +5,7 @@ import ( "fmt" "log" "os" + "runtime" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -68,7 +69,11 @@ func (s *oci) New(ctx context.Context, cfg *BackendConfig) (Backend, func() erro buildkitdArgs = append([]string{"sudo", "-u", fmt.Sprintf("#%d", s.uid), "-i", "--", "exec", "rootlesskit"}, buildkitdArgs...) } - buildkitdSock, stop, err := runBuildkitd(ctx, cfg, buildkitdArgs, cfg.Logs, s.uid, s.gid, nil) + var extraEnv []string + if runtime.GOOS != "windows" && s.snapshotter != "native" { + extraEnv = append(extraEnv, "BUILDKIT_DEBUG_FORCE_OVERLAY_DIFF=true") + } + buildkitdSock, stop, err := runBuildkitd(ctx, cfg, buildkitdArgs, cfg.Logs, s.uid, s.gid, extraEnv) if err != nil { printLogs(cfg.Logs, log.Println) return nil, nil, err diff --git a/util/testutil/integration/sandbox.go b/util/testutil/integration/sandbox.go index df23f35768b8..8367f92889a4 100644 --- a/util/testutil/integration/sandbox.go +++ b/util/testutil/integration/sandbox.go @@ -182,9 +182,6 @@ func runBuildkitd(ctx context.Context, conf *BackendConfig, args []string, logs args = append(args, "--root", tmpdir, "--addr", address, "--debug") cmd := exec.Command(args[0], args[1:]...) cmd.Env = append(os.Environ(), "BUILDKIT_DEBUG_EXEC_OUTPUT=1", "BUILDKIT_DEBUG_PANIC_ON_ERROR=1", "TMPDIR="+filepath.Join(tmpdir, "tmp")) - if runtime.GOOS != "windows" { - cmd.Env = append(cmd.Env, "BUILDKIT_DEBUG_FORCE_OVERLAY_DIFF=true") - } cmd.Env = append(cmd.Env, extraEnv...) cmd.SysProcAttr = getSysProcAttr() From 0ddfb544b5143540ae9582c3e9c86b3ec4185b74 Mon Sep 17 00:00:00 2001 From: Erik Sipsma Date: Tue, 23 Nov 2021 17:07:24 -0800 Subject: [PATCH 3/3] snapshot: cleanup diffApply and prepare for DiffOp This breaks the giant blob that was the diffApply function into two separate parts, a differ and an applier, which results in more modular code that should be easier to follow and easier to make any future updates to. For example, if we want to optimize by allowing differ and applier to run in parallel in the future, that's straightforward now. There are also some fixes that weren't needed for MergeOp, but will be for DiffOp, such as correctly handling the case where a deletion is applied that is under parent directories which don't exist yet (the correct behavior is, surprisingly, to create the parent directories as that is what the image import/export code ends up doing). Signed-off-by: Erik Sipsma --- snapshot/diffapply_unix.go | 898 ++++++++++++++++++++++++---------- snapshot/diffapply_windows.go | 8 +- snapshot/localmounter.go | 32 -- snapshot/merge.go | 61 +-- snapshot/snapshotter_test.go | 3 +- util/overlay/overlay_linux.go | 8 +- 6 files changed, 676 insertions(+), 334 deletions(-) diff --git a/snapshot/diffapply_unix.go b/snapshot/diffapply_unix.go index b030e09d6a5e..947f9a8dedab 100644 --- a/snapshot/diffapply_unix.go +++ b/snapshot/diffapply_unix.go @@ -8,6 +8,7 @@ import ( gofs "io/fs" "os" "path/filepath" + "strings" "syscall" "github.com/containerd/containerd/leases" @@ -16,6 +17,7 @@ import ( "github.com/containerd/continuity/fs" "github.com/containerd/continuity/sysx" "github.com/containerd/stargz-snapshotter/snapshot/overlayutils" + "github.com/hashicorp/go-multierror" "github.com/moby/buildkit/identity" "github.com/moby/buildkit/util/bklog" "github.com/moby/buildkit/util/leaseutil" @@ -24,240 +26,353 @@ import ( "golang.org/x/sys/unix" ) -// diffApply calculates the diff between two directories and directly applies the changes to a separate mount (without using -// the content store as an intermediary). If useHardlink is set to true, it will hardlink non-directories instead of copying -// them when applying. This obviously requires that each of the mounts provided are for immutable, committed snapshots. -// externalHardlinks tracks any such hardlinks, which is needed for doing correct disk usage calculations elsewhere. -func diffApply(ctx context.Context, lowerMountable, upperMountable, applyMountable Mountable, useHardlink bool, externalHardlinks map[uint64]struct{}, createWhiteoutDeletes bool) error { - if applyMountable == nil { - return errors.New("invalid nil apply mounts") +// diffApply applies the provided diffs to the dest Mountable and returns the correctly calculated disk usage +// that accounts for any hardlinks made from existing snapshots. ctx is expected to have a temporary lease +// associated with it. +func (sn *mergeSnapshotter) diffApply(ctx context.Context, dest Mountable, diffs ...Diff) (_ snapshots.Usage, rerr error) { + a, err := applierFor(dest, sn.tryCrossSnapshotLink) + if err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "failed to create applier") } + defer func() { + releaseErr := a.Release() + if releaseErr != nil { + rerr = multierror.Append(rerr, errors.Wrapf(releaseErr, "failed to release applier")).ErrorOrNil() + } + }() + + // TODO:(sipsma) optimization: parallelize differ and applier in separate goroutines, connected with a buffered channel - var lowerMounts []mount.Mount - if lowerMountable != nil { - mounts, unmountLower, err := lowerMountable.Mount() + for _, diff := range diffs { + var lowerMntable Mountable + if diff.Lower != "" { + if info, err := sn.Stat(ctx, diff.Lower); err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "failed to stat lower snapshot %s", diff.Lower) + } else if info.Kind == snapshots.KindCommitted { + lowerMntable, err = sn.View(ctx, identity.NewID(), diff.Lower) + if err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "failed to mount lower snapshot view %s", diff.Lower) + } + } else { + lowerMntable, err = sn.Mounts(ctx, diff.Lower) + if err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "failed to mount lower snapshot %s", diff.Lower) + } + } + } + var upperMntable Mountable + if diff.Upper != "" { + if info, err := sn.Stat(ctx, diff.Upper); err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "failed to stat upper snapshot %s", diff.Upper) + } else if info.Kind == snapshots.KindCommitted { + upperMntable, err = sn.View(ctx, identity.NewID(), diff.Upper) + if err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "failed to mount upper snapshot view %s", diff.Upper) + } + } else { + upperMntable, err = sn.Mounts(ctx, diff.Upper) + if err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "failed to mount upper snapshot %s", diff.Upper) + } + } + } else { + // create an empty view + upperMntable, err = sn.View(ctx, identity.NewID(), "") + if err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "failed to mount empty upper snapshot view %s", diff.Upper) + } + } + d, err := differFor(lowerMntable, upperMntable) if err != nil { - return err + return snapshots.Usage{}, errors.Wrapf(err, "failed to create differ") + } + defer func() { + rerr = multierror.Append(rerr, d.Release()).ErrorOrNil() + }() + if err := d.HandleChanges(ctx, a.Apply); err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "failed to handle changes") } - lowerMounts = mounts - defer unmountLower() } - var upperMounts []mount.Mount - if upperMountable != nil { - mounts, unmountUpper, err := upperMountable.Mount() - if err != nil { - return err - } - upperMounts = mounts - defer unmountUpper() + if err := a.Flush(); err != nil { + return snapshots.Usage{}, errors.Wrapf(err, "failed to flush changes") } + return a.Usage() +} - lowerMnter := LocalMounterWithMounts(lowerMounts) - lowerView, err := lowerMnter.Mount() - if err != nil { - return err +type change struct { + kind fs.ChangeKind + subPath string + srcPath string + srcStat *syscall.Stat_t + // linkSubPath is set to a subPath of a previous change from the same + // differ instance that is a hardlink to this one, if any. + linkSubPath string +} + +type changeApply struct { + *change + dstPath string + dstStat *syscall.Stat_t +} + +type inode struct { + ino uint64 + dev uint64 +} + +func statInode(stat *syscall.Stat_t) inode { + if stat == nil { + return inode{} + } + return inode{ + ino: stat.Ino, + dev: stat.Dev, } - defer lowerMnter.Unmount() +} - upperMnter := LocalMounterWithMounts(upperMounts) - upperView, err := upperMnter.Mount() - if err != nil { - return err +type applier struct { + root string + release func() error + lowerdirs []string // ordered highest -> lowest, the order we want to check them in + crossSnapshotLinks map[inode]struct{} + createWhiteoutDelete bool + dirModTimes map[string]unix.Timespec // map of dstPath -> mtime that should be set on that subPath +} + +func applierFor(dest Mountable, tryCrossSnapshotLink bool) (_ *applier, rerr error) { + a := &applier{ + dirModTimes: make(map[string]unix.Timespec), + } + defer func() { + if rerr != nil { + rerr = multierror.Append(rerr, a.Release()).ErrorOrNil() + } + }() + if tryCrossSnapshotLink { + a.crossSnapshotLinks = make(map[inode]struct{}) } - defer upperMnter.Unmount() - applyMounts, unmountApply, err := applyMountable.Mount() + mnts, release, err := dest.Mount() if err != nil { - return err + return nil, nil } - defer unmountApply() + a.release = release - var applyRoot string - if useHardlink { - applyRoot, err = getRWDir(applyMounts) - if err != nil { - return err + if len(mnts) != 1 { + return nil, errors.Errorf("expected exactly one mount, got %d", len(mnts)) + } + mnt := mnts[0] + + switch mnt.Type { + case "overlay": + for _, opt := range mnt.Options { + if strings.HasPrefix(opt, "upperdir=") { + a.root = strings.TrimPrefix(opt, "upperdir=") + } else if strings.HasPrefix(opt, "lowerdir=") { + a.lowerdirs = strings.Split(strings.TrimPrefix(opt, "lowerdir="), ":") + } } - } else { - applyMnter := LocalMounterWithMounts(applyMounts) - applyRoot, err = applyMnter.Mount() + if a.root == "" { + return nil, errors.Errorf("could not find upperdir in mount options %v", mnt.Options) + } + if len(a.lowerdirs) == 0 { + return nil, errors.Errorf("could not find lowerdir in mount options %v", mnt.Options) + } + a.createWhiteoutDelete = true + case "bind", "rbind": + a.root = mnt.Source + default: + mnter := LocalMounter(dest) + root, err := mnter.Mount() if err != nil { - return err + return nil, err + } + a.root = root + prevRelease := a.release + a.release = func() error { + err := mnter.Unmount() + return multierror.Append(err, prevRelease()).ErrorOrNil() } - defer applyMnter.Unmount() - } - type pathTime struct { - applyPath string - atime unix.Timespec - mtime unix.Timespec } - // times holds the paths+times we visited and need to set - var times []pathTime + a.root, err = filepath.EvalSymlinks(a.root) + if err != nil { + return nil, errors.Wrapf(err, "failed to resolve symlinks in %s", a.root) + } + return a, nil +} - visited := make(map[string]struct{}) - inodes := make(map[uint64]string) +func (a *applier) Apply(ctx context.Context, c *change) error { + if c == nil { + return errors.New("nil change") + } - diffCalculator := func(cf fs.ChangeFunc) error { - return fs.Changes(ctx, lowerView, upperView, cf) + if c.kind == fs.ChangeKindUnmodified { + return nil } - upperRoot := upperView - // If using hardlinks, set upperRoot to the underlying filesystem so hardlinks can avoid EXDEV - if useHardlink && len(upperMounts) == 1 { - switch upperMounts[0].Type { - case "bind", "rbind": - upperRoot = upperMounts[0].Source - case "overlay": - if upperdir, err := overlay.GetUpperdir(lowerMounts, upperMounts); err == nil { - upperRoot = upperdir - diffCalculator = func(cf fs.ChangeFunc) error { - return overlay.Changes(ctx, cf, upperRoot, upperView, lowerView) - } - } else { - useHardlink = false - } + dstPath, err := safeJoin(a.root, c.subPath) + if err != nil { + return errors.Wrapf(err, "failed to join paths %q and %q", a.root, c.subPath) + } + var dstStat *syscall.Stat_t + if dstfi, err := os.Lstat(dstPath); err == nil { + stat, ok := dstfi.Sys().(*syscall.Stat_t) + if !ok { + return errors.Errorf("failed to get stat_t for %T", dstStat) } + dstStat = stat + } else if !os.IsNotExist(err) { + return errors.Wrap(err, "failed to stat during copy apply") } - var changeFunc fs.ChangeFunc - changeFunc = func(kind fs.ChangeKind, changePath string, upperFi os.FileInfo, prevErr error) error { - if prevErr != nil { - return prevErr - } + ca := &changeApply{ + change: c, + dstPath: dstPath, + dstStat: dstStat, + } - applyPath, err := safeJoin(applyRoot, changePath) - if err != nil { - return errors.Wrapf(err, "failed to get apply path for root %q, change %q", applyRoot, changePath) - } - upperPath, err := safeJoin(upperRoot, changePath) - if err != nil { - return errors.Wrapf(err, "failed to get upper path for root %q, change %q", upperPath, changePath) - } + if done, err := a.applyDelete(ctx, ca); err != nil { + return errors.Wrap(err, "failed to delete during apply") + } else if done { + return nil + } - visited[upperPath] = struct{}{} + if done, err := a.applyHardlink(ctx, ca); err != nil { + return errors.Wrapf(err, "failed to hardlink during apply") + } else if done { + return nil + } - if kind == fs.ChangeKindUnmodified { - return nil - } + if err := a.applyCopy(ctx, ca); err != nil { + return errors.Wrapf(err, "failed to copy during apply") + } + return nil +} - // When using overlay, a delete is represented with a whiteout device, which we actually want to - // hardlink or recreate here. If upperFi is non-nil, that means we have a whiteout device we can - // hardlink in. If upperFi is nil though, we need to first remove what's at the path and then, in - // the overlay case, create a whiteout device to represent the delete. This can happen when the - // overlay differ encounters an opaque dir, in which case it switches to the walking differ in order - // to convert from the opaque representation to the "explicit whiteout" format. - if kind == fs.ChangeKindDelete && upperFi == nil { - if err := os.RemoveAll(applyPath); err != nil { - return errors.Wrapf(err, "failed to remove path %s during apply", applyPath) - } - // Create the whiteout device only if enabled and if we are directly applying to an upperdir (as - // opposed to applying to an overlay mount itself). - if createWhiteoutDeletes && upperRoot != upperView { - if err := unix.Mknod(applyPath, unix.S_IFCHR, int(unix.Mkdev(0, 0))); err != nil { - return errors.Wrap(err, "failed to create whiteout delete") - } - } - return nil - } +func (a *applier) applyDelete(ctx context.Context, ca *changeApply) (bool, error) { + // Even when not deleting, we may be overwriting a file, in which case we should + // delete the existing file at the path, if any. Don't delete when both are dirs + // in this case though because they should get merged, not overwritten. + deleteOnly := ca.kind == fs.ChangeKindDelete + overwrite := !deleteOnly && ca.dstStat != nil && ca.srcStat.Mode&ca.dstStat.Mode&unix.S_IFMT != unix.S_IFDIR - upperType := upperFi.Mode().Type() - if upperType == os.ModeIrregular { - return errors.Errorf("unhandled irregular mode file during merge at path %q", changePath) - } + if !deleteOnly && !overwrite { + // nothing to delete, continue on + return false, nil + } - upperStat, ok := upperFi.Sys().(*syscall.Stat_t) - if !ok { - return errors.Errorf("unhandled stat type for %+v", upperFi) - } + if err := os.RemoveAll(ca.dstPath); err != nil { + return false, errors.Wrap(err, "failed to remove during apply") + } + ca.dstStat = nil - // Check to see if the parent dir needs to be created. This is needed when we are visiting a dirent - // that changes but never visit its parent dir because the parent did not change in the diff - changeParent := filepath.Dir(changePath) - if changeParent != "/" { - upperParent := filepath.Dir(upperPath) - if _, ok := visited[upperParent]; !ok { - parentInfo, err := os.Lstat(upperParent) - if err != nil { - return errors.Wrap(err, "failed to stat parent path during apply") - } - if err := changeFunc(fs.ChangeKindAdd, changeParent, parentInfo, nil); err != nil { - return err + if deleteOnly && a.createWhiteoutDelete { + // only create a whiteout device if there is something to delete + var foundLower bool + for _, lowerdir := range a.lowerdirs { + lowerPath, err := safeJoin(lowerdir, ca.subPath) + if err != nil { + return false, errors.Wrapf(err, "failed to join lowerdir %q and subPath %q", lowerdir, ca.subPath) + } + if _, err := os.Lstat(lowerPath); err == nil { + foundLower = true + break + } else if !errors.Is(err, unix.ENOENT) && !errors.Is(err, unix.ENOTDIR) { + return false, errors.Wrapf(err, "failed to stat lowerPath %q", lowerPath) + } + } + if foundLower { + ca.kind = fs.ChangeKindAdd + if ca.srcStat == nil { + ca.srcStat = &syscall.Stat_t{ + Mode: syscall.S_IFCHR, + Rdev: unix.Mkdev(0, 0), } + ca.srcPath = "" } + return false, nil } + } - applyFi, err := os.Lstat(applyPath) - if err != nil && !os.IsNotExist(err) { - return errors.Wrapf(err, "failed to stat path during apply") - } + return deleteOnly, nil +} - // if there is an existing file/dir at the applyPath, delete it unless both it and upper are dirs (in which case they get merged) - if applyFi != nil && !(applyFi.IsDir() && upperFi.IsDir()) { - if err := os.RemoveAll(applyPath); err != nil { - return errors.Wrapf(err, "failed to remove path %s during apply", applyPath) +func (a *applier) applyHardlink(ctx context.Context, ca *changeApply) (bool, error) { + switch ca.srcStat.Mode & unix.S_IFMT { + case unix.S_IFDIR, unix.S_IFIFO, unix.S_IFSOCK: + // Directories can't be hard-linked, so they just have to be recreated. + // Named pipes and sockets can be hard-linked but is best to avoid as it could enable IPC in weird cases. + return false, nil + + default: + var linkSrcPath string + if ca.linkSubPath != "" { + // there's an already applied path that we should link from + path, err := safeJoin(a.root, ca.linkSubPath) + if err != nil { + return false, errors.Errorf("failed to get hardlink source path: %v", err) } - applyFi = nil + linkSrcPath = path + } else if a.crossSnapshotLinks != nil { + // we can try to link across snapshots from the source file + linkSrcPath = ca.srcPath + a.crossSnapshotLinks[statInode(ca.srcStat)] = struct{}{} + } + if linkSrcPath == "" { + // nothing to hardlink from, will have to copy the file + return false, nil } - // hardlink fast-path - if useHardlink { - switch upperType { - case os.ModeDir, os.ModeNamedPipe, os.ModeSocket: - // Directories can't be hard-linked, so they just have to be recreated. - // Named pipes and sockets can be hard-linked but is best to avoid as it could enable IPC by sharing merge inputs. - break - default: - // TODO:(sipsma) consider handling EMLINK by falling back to copy - if err := os.Link(upperPath, applyPath); err != nil { - return errors.Wrapf(err, "failed to hardlink %q to %q during apply", upperPath, applyPath) - } - // mark this inode as one coming from a separate snapshot, needed for disk usage calculations elsewhere - externalHardlinks[upperStat.Ino] = struct{}{} - return nil + if err := os.Link(linkSrcPath, ca.dstPath); errors.Is(err, unix.EXDEV) || errors.Is(err, unix.EMLINK) { + // These errors are expected when the hardlink would cross devices or would exceed the maximum number of links for the inode. + // Just fallback to a copy. + bklog.G(ctx).WithError(err).WithField("srcPath", linkSrcPath).WithField("dstPath", ca.dstPath).Debug("hardlink failed") + if a.crossSnapshotLinks != nil { + delete(a.crossSnapshotLinks, statInode(ca.srcStat)) } + return false, nil + } else if err != nil { + return false, errors.Wrap(err, "failed to hardlink during apply") } - switch upperType { - case 0: // regular file - if upperStat.Nlink > 1 { - if linkedPath, ok := inodes[upperStat.Ino]; ok { - if err := os.Link(linkedPath, applyPath); err != nil { - return errors.Wrap(err, "failed to create hardlink during apply") - } - return nil // no other metadata updates needed when hardlinking - } - inodes[upperStat.Ino] = applyPath - } - if err := fs.CopyFile(applyPath, upperPath); err != nil { - return errors.Wrapf(err, "failed to copy from %s to %s during apply", upperPath, applyPath) - } - case os.ModeDir: - if applyFi == nil { - // applyPath doesn't exist, make it a dir - if err := os.Mkdir(applyPath, upperFi.Mode()); err != nil { - return errors.Wrap(err, "failed to create applied dir") - } - } - case os.ModeSymlink: - if target, err := os.Readlink(upperPath); err != nil { - return errors.Wrap(err, "failed to read symlink during apply") - } else if err := os.Symlink(target, applyPath); err != nil { - return errors.Wrap(err, "failed to create symlink during apply") - } - case os.ModeCharDevice, os.ModeDevice, os.ModeNamedPipe, os.ModeSocket: - if err := unix.Mknod(applyPath, uint32(upperFi.Mode()), int(upperStat.Rdev)); err != nil { - return errors.Wrap(err, "failed to create device during apply") + return true, nil + } +} + +func (a *applier) applyCopy(ctx context.Context, ca *changeApply) error { + switch ca.srcStat.Mode & unix.S_IFMT { + case unix.S_IFREG: + if err := fs.CopyFile(ca.dstPath, ca.srcPath); err != nil { + return errors.Wrapf(err, "failed to copy from %s to %s during apply", ca.srcPath, ca.dstPath) + } + case unix.S_IFDIR: + if ca.dstStat == nil { + // dstPath doesn't exist, make it a dir + if err := unix.Mkdir(ca.dstPath, ca.srcStat.Mode); err != nil { + return errors.Wrapf(err, "failed to create applied dir at %q from %q", ca.dstPath, ca.srcPath) } - default: - // should never be here, all types should be handled - return errors.Errorf("unhandled file type %q during merge at path %q", upperType.String(), changePath) } + case unix.S_IFLNK: + if target, err := os.Readlink(ca.srcPath); err != nil { + return errors.Wrap(err, "failed to read symlink during apply") + } else if err := os.Symlink(target, ca.dstPath); err != nil { + return errors.Wrap(err, "failed to create symlink during apply") + } + case unix.S_IFBLK, unix.S_IFCHR, unix.S_IFIFO, unix.S_IFSOCK: + if err := unix.Mknod(ca.dstPath, ca.srcStat.Mode, int(ca.srcStat.Rdev)); err != nil { + return errors.Wrap(err, "failed to mknod during apply") + } + default: + // should never be here, all types should be handled + return errors.Errorf("unhandled file type %d during merge at path %q", ca.srcStat.Mode&unix.S_IFMT, ca.srcPath) + } - xattrs, err := sysx.LListxattr(upperPath) + if ca.srcPath != "" { + xattrs, err := sysx.LListxattr(ca.srcPath) if err != nil { - return errors.Wrapf(err, "failed to list xattrs of upper path %s", upperPath) + return errors.Wrapf(err, "failed to list xattrs of src path %s", ca.srcPath) } for _, xattr := range xattrs { if isOpaqueXattr(xattr) { @@ -266,82 +381,81 @@ func diffApply(ctx context.Context, lowerMountable, upperMountable, applyMountab // the merge (as taken care of by the overlay differ). continue } - xattrVal, err := sysx.LGetxattr(upperPath, xattr) + xattrVal, err := sysx.LGetxattr(ca.srcPath, xattr) if err != nil { - return errors.Wrapf(err, "failed to get xattr %s of upper path %s", xattr, upperPath) + return errors.Wrapf(err, "failed to get xattr %s of src path %s", xattr, ca.srcPath) } - if err := sysx.LSetxattr(applyPath, xattr, xattrVal, 0); err != nil { + if err := sysx.LSetxattr(ca.dstPath, xattr, xattrVal, 0); err != nil { // This can often fail, so just log it: https://github.com/moby/buildkit/issues/1189 - bklog.G(ctx).Debugf("failed to set xattr %s of path %s during apply", xattr, applyPath) - } - } - - if err := os.Lchown(applyPath, int(upperStat.Uid), int(upperStat.Gid)); err != nil { - return errors.Wrap(err, "failed to chown applied dir") - } - - if upperType != os.ModeSymlink { - if err := os.Chmod(applyPath, upperFi.Mode()); err != nil { - return errors.Wrap(err, "failed to chmod applied dir") + bklog.G(ctx).Debugf("failed to set xattr %s of path %s during apply", xattr, ca.dstPath) } } + } - // save the times we should set on this path, to be applied at the end. - times = append(times, pathTime{ - applyPath: applyPath, - atime: unix.Timespec{ - Sec: upperStat.Atim.Sec, - Nsec: upperStat.Atim.Nsec, - }, - mtime: unix.Timespec{ - Sec: upperStat.Mtim.Sec, - Nsec: upperStat.Mtim.Nsec, - }, - }) - return nil + if err := os.Lchown(ca.dstPath, int(ca.srcStat.Uid), int(ca.srcStat.Gid)); err != nil { + return errors.Wrap(err, "failed to chown during apply") } - if err := diffCalculator(changeFunc); err != nil { - return err + if ca.srcStat.Mode&unix.S_IFMT != unix.S_IFLNK { + if err := unix.Chmod(ca.dstPath, ca.srcStat.Mode); err != nil { + return errors.Wrapf(err, "failed to chmod path %q during apply", ca.dstPath) + } } - // Set times now that everything has been modified. - for i := range times { - ts := times[len(times)-1-i] - if err := unix.UtimesNanoAt(unix.AT_FDCWD, ts.applyPath, []unix.Timespec{ts.atime, ts.mtime}, unix.AT_SYMLINK_NOFOLLOW); err != nil { - return errors.Wrapf(err, "failed to update times of path %q", ts.applyPath) + atimeSpec := unix.Timespec{Sec: ca.srcStat.Atim.Sec, Nsec: ca.srcStat.Atim.Nsec} + mtimeSpec := unix.Timespec{Sec: ca.srcStat.Mtim.Sec, Nsec: ca.srcStat.Mtim.Nsec} + if ca.srcStat.Mode&unix.S_IFMT != unix.S_IFDIR { + // apply times immediately for non-dirs + if err := unix.UtimesNanoAt(unix.AT_FDCWD, ca.dstPath, []unix.Timespec{atimeSpec, mtimeSpec}, unix.AT_SYMLINK_NOFOLLOW); err != nil { + return err } + } else { + // save the times we should set on this dir, to be applied after subfiles have been set + a.dirModTimes[ca.dstPath] = mtimeSpec } return nil } -func safeJoin(root, path string) (string, error) { - dir, base := filepath.Split(path) - parent, err := fs.RootPath(root, dir) - if err != nil { - return "", err - } - return filepath.Join(parent, base), nil +func (a *applier) Flush() error { + // Set dir times now that everything has been modified. Walk the filesystem tree to ensure + // that we never try to apply to a path that has been deleted or modified since times for it + // were stored. This is needed for corner cases such as where a parent dir is removed and + // replaced with a symlink. + return filepath.WalkDir(a.root, func(path string, d gofs.DirEntry, prevErr error) error { + if prevErr != nil { + return prevErr + } + if !d.IsDir() { + return nil + } + if mtime, ok := a.dirModTimes[path]; ok { + if err := unix.UtimesNanoAt(unix.AT_FDCWD, path, []unix.Timespec{{Nsec: unix.UTIME_OMIT}, mtime}, unix.AT_SYMLINK_NOFOLLOW); err != nil { + return err + } + } + return nil + }) } -// diskUsage calculates the disk space used by the provided mounts, similar to the normal containerd snapshotter disk usage -// calculations but with the extra ability to take into account hardlinks that were created between snapshots, ensuring that -// they don't get double counted. -func diskUsage(ctx context.Context, mountable Mountable, externalHardlinks map[uint64]struct{}) (snapshots.Usage, error) { - mounts, unmount, err := mountable.Mount() - if err != nil { - return snapshots.Usage{}, err +func (a *applier) Release() error { + if a.release != nil { + err := a.release() + if err != nil { + return err + } } - defer unmount() + a.release = nil + return nil +} - inodes := make(map[uint64]struct{}) +func (a *applier) Usage() (snapshots.Usage, error) { + // Calculate the disk space used under the apply root, similar to the normal containerd snapshotter disk usage + // calculations but with the extra ability to take into account hardlinks that were created between snapshots, ensuring that + // they don't get double counted. + inodes := make(map[inode]struct{}) var usage snapshots.Usage - root, err := getRWDir(mounts) - if err != nil { - return snapshots.Usage{}, err - } - if err := filepath.WalkDir(root, func(path string, dirent gofs.DirEntry, err error) error { + if err := filepath.WalkDir(a.root, func(path string, dirent gofs.DirEntry, err error) error { if err != nil { return err } @@ -350,14 +464,19 @@ func diskUsage(ctx context.Context, mountable Mountable, externalHardlinks map[u return err } stat := info.Sys().(*syscall.Stat_t) - if _, ok := inodes[stat.Ino]; ok { + inode := statInode(stat) + if _, ok := inodes[inode]; ok { return nil } - inodes[stat.Ino] = struct{}{} - if _, ok := externalHardlinks[stat.Ino]; !ok { - usage.Inodes++ - usage.Size += stat.Blocks * 512 // 512 is always block size, see "man 2 stat" + inodes[inode] = struct{}{} + if a.crossSnapshotLinks != nil { + if _, ok := a.crossSnapshotLinks[statInode(stat)]; ok { + // don't count cross-snapshot hardlinks + return nil + } } + usage.Inodes++ + usage.Size += stat.Blocks * 512 // 512 is always block size, see "man 2 stat" return nil }); err != nil { return snapshots.Usage{}, err @@ -365,6 +484,295 @@ func diskUsage(ctx context.Context, mountable Mountable, externalHardlinks map[u return usage, nil } +type differ struct { + lowerRoot string + releaseLower func() error + + upperRoot string + releaseUpper func() error + + upperBindSource string + upperOverlayDirs []string // ordered lowest -> highest + + upperdir string + + visited map[string]struct{} // set of parent subPaths that have been visited + inodes map[inode]string // map of inode -> subPath +} + +func differFor(lowerMntable, upperMntable Mountable) (_ *differ, rerr error) { + d := &differ{ + visited: make(map[string]struct{}), + inodes: make(map[inode]string), + } + defer func() { + if rerr != nil { + rerr = multierror.Append(rerr, d.Release()).ErrorOrNil() + } + }() + + var lowerMnts []mount.Mount + if lowerMntable != nil { + mnts, release, err := lowerMntable.Mount() + if err != nil { + return nil, err + } + mounter := LocalMounterWithMounts(mnts) + root, err := mounter.Mount() + if err != nil { + return nil, err + } + d.lowerRoot = root + lowerMnts = mnts + d.releaseLower = func() error { + err := mounter.Unmount() + return multierror.Append(err, release()).ErrorOrNil() + } + } + + var upperMnts []mount.Mount + if upperMntable != nil { + mnts, release, err := upperMntable.Mount() + if err != nil { + return nil, err + } + mounter := LocalMounterWithMounts(mnts) + root, err := mounter.Mount() + if err != nil { + return nil, err + } + d.upperRoot = root + upperMnts = mnts + d.releaseUpper = func() error { + err := mounter.Unmount() + return multierror.Append(err, release()).ErrorOrNil() + } + } + + if len(upperMnts) == 1 { + switch upperMnts[0].Type { + case "bind", "rbind": + d.upperBindSource = upperMnts[0].Source + case "overlay": + overlayDirs, err := overlay.GetOverlayLayers(upperMnts[0]) + if err != nil { + return nil, errors.Wrapf(err, "failed to get overlay layers from mount %+v", upperMnts[0]) + } + d.upperOverlayDirs = overlayDirs + } + } + if len(lowerMnts) > 0 { + if upperdir, err := overlay.GetUpperdir(lowerMnts, upperMnts); err == nil { + d.upperdir = upperdir + } + } + + return d, nil +} + +func (d *differ) HandleChanges(ctx context.Context, handle func(context.Context, *change) error) error { + if d.upperdir != "" { + return d.overlayChanges(ctx, handle) + } + return d.doubleWalkingChanges(ctx, handle) +} + +func (d *differ) doubleWalkingChanges(ctx context.Context, handle func(context.Context, *change) error) error { + return fs.Changes(ctx, d.lowerRoot, d.upperRoot, func(kind fs.ChangeKind, subPath string, srcfi os.FileInfo, prevErr error) error { + if prevErr != nil { + return prevErr + } + if ctx.Err() != nil { + return ctx.Err() + } + + if kind == fs.ChangeKindUnmodified { + return nil + } + + // NOTE: it's tempting to skip creating parent dirs when change kind is Delete, but + // that would make us incompatible with the image exporter code: + // https://github.com/containerd/containerd/pull/2095 + if err := d.checkParent(ctx, subPath, handle); err != nil { + return errors.Wrapf(err, "failed to check parent for %s", subPath) + } + + c := &change{ + kind: kind, + subPath: subPath, + } + + if srcfi != nil { + // Try to ensure that srcPath and srcStat are set to a file from the underlying filesystem + // rather than the actual mount when possible. This allows hardlinking without getting EXDEV. + switch { + case !srcfi.IsDir() && d.upperBindSource != "": + srcPath, err := safeJoin(d.upperBindSource, c.subPath) + if err != nil { + return errors.Wrapf(err, "failed to join %s and %s", d.upperBindSource, c.subPath) + } + c.srcPath = srcPath + if fi, err := os.Lstat(c.srcPath); err == nil { + srcfi = fi + } else { + return errors.Wrap(err, "failed to stat underlying file from bind mount") + } + case !srcfi.IsDir() && len(d.upperOverlayDirs) > 0: + for i := range d.upperOverlayDirs { + dir := d.upperOverlayDirs[len(d.upperOverlayDirs)-1-i] + path, err := safeJoin(dir, c.subPath) + if err != nil { + return errors.Wrapf(err, "failed to join %s and %s", dir, c.subPath) + } + if stat, err := os.Lstat(path); err == nil { + c.srcPath = path + srcfi = stat + break + } else if errors.Is(err, unix.ENOENT) { + continue + } else { + return errors.Wrap(err, "failed to lstat when finding direct path of overlay file") + } + } + default: + srcPath, err := safeJoin(d.upperRoot, subPath) + if err != nil { + return errors.Wrapf(err, "failed to join %s and %s", d.upperRoot, subPath) + } + c.srcPath = srcPath + if fi, err := os.Lstat(c.srcPath); err == nil { + srcfi = fi + } else { + return errors.Wrap(err, "failed to stat srcPath from differ") + } + } + + var ok bool + c.srcStat, ok = srcfi.Sys().(*syscall.Stat_t) + if !ok { + return errors.Errorf("unhandled stat type for %+v", srcfi) + } + + if !srcfi.IsDir() && c.srcStat.Nlink > 1 { + if linkSubPath, ok := d.inodes[statInode(c.srcStat)]; ok { + c.linkSubPath = linkSubPath + } else { + d.inodes[statInode(c.srcStat)] = c.subPath + } + } + } + + return handle(ctx, c) + }) +} + +func (d *differ) overlayChanges(ctx context.Context, handle func(context.Context, *change) error) error { + return overlay.Changes(ctx, func(kind fs.ChangeKind, subPath string, srcfi os.FileInfo, prevErr error) error { + if prevErr != nil { + return prevErr + } + if ctx.Err() != nil { + return ctx.Err() + } + + if kind == fs.ChangeKindUnmodified { + return nil + } + + if err := d.checkParent(ctx, subPath, handle); err != nil { + return errors.Wrapf(err, "failed to check parent for %s", subPath) + } + + srcPath, err := safeJoin(d.upperdir, subPath) + if err != nil { + return errors.Wrapf(err, "failed to join %s and %s", d.upperdir, subPath) + } + + c := &change{ + kind: kind, + subPath: subPath, + srcPath: srcPath, + } + + if srcfi != nil { + var ok bool + c.srcStat, ok = srcfi.Sys().(*syscall.Stat_t) + if !ok { + return errors.Errorf("unhandled stat type for %+v", srcfi) + } + + if !srcfi.IsDir() && c.srcStat.Nlink > 1 { + if linkSubPath, ok := d.inodes[statInode(c.srcStat)]; ok { + c.linkSubPath = linkSubPath + } else { + d.inodes[statInode(c.srcStat)] = c.subPath + } + } + } + + return handle(ctx, c) + }, d.upperdir, d.upperRoot, d.lowerRoot) +} + +func (d *differ) checkParent(ctx context.Context, subPath string, handle func(context.Context, *change) error) error { + parentSubPath := filepath.Dir(subPath) + if parentSubPath == "/" { + return nil + } + if _, ok := d.visited[parentSubPath]; ok { + return nil + } + d.visited[parentSubPath] = struct{}{} + + if err := d.checkParent(ctx, parentSubPath, handle); err != nil { + return err + } + parentSrcPath, err := safeJoin(d.upperRoot, parentSubPath) + if err != nil { + return err + } + srcfi, err := os.Lstat(parentSrcPath) + if err != nil { + return err + } + parentSrcStat, ok := srcfi.Sys().(*syscall.Stat_t) + if !ok { + return errors.Errorf("unexpected type %T", srcfi) + } + return handle(ctx, &change{ + kind: fs.ChangeKindModify, + subPath: parentSubPath, + srcPath: parentSrcPath, + srcStat: parentSrcStat, + }) +} + +func (d *differ) Release() error { + var err error + if d.releaseLower != nil { + err = d.releaseLower() + if err == nil { + d.releaseLower = nil + } + } + if d.releaseUpper != nil { + err = multierror.Append(err, d.releaseUpper()).ErrorOrNil() + if err == nil { + d.releaseUpper = nil + } + } + return err +} + +func safeJoin(root, path string) (string, error) { + dir, base := filepath.Split(path) + parent, err := fs.RootPath(root, dir) + if err != nil { + return "", err + } + return filepath.Join(parent, base), nil +} + func isOpaqueXattr(s string) bool { for _, k := range []string{"trusted.overlay.opaque", "user.overlay.opaque"} { if s == k { diff --git a/snapshot/diffapply_windows.go b/snapshot/diffapply_windows.go index 64eef8e06660..2ec9fc3d789c 100644 --- a/snapshot/diffapply_windows.go +++ b/snapshot/diffapply_windows.go @@ -11,12 +11,8 @@ import ( "github.com/pkg/errors" ) -func diffApply(ctx context.Context, lowerMountable, upperMountable, applyMountable Mountable, useHardlink bool, externalHardlinks map[uint64]struct{}, createWhiteoutDeletes bool) error { - return errors.New("diff apply not supported on windows") -} - -func diskUsage(ctx context.Context, mountable Mountable, externalHardlinks map[uint64]struct{}) (snapshots.Usage, error) { - return snapshots.Usage{}, errors.New("disk usage not supported on windows") +func (sn *mergeSnapshotter) diffApply(ctx context.Context, dest Mountable, diffs ...Diff) (_ snapshots.Usage, rerr error) { + return snapshots.Usage{}, errors.New("diffApply not yet supported on windows") } func needsUserXAttr(ctx context.Context, sn Snapshotter, lm leases.Manager) (bool, error) { diff --git a/snapshot/localmounter.go b/snapshot/localmounter.go index 6f210ea09456..9ddb7c1af642 100644 --- a/snapshot/localmounter.go +++ b/snapshot/localmounter.go @@ -1,11 +1,9 @@ package snapshot import ( - "strings" "sync" "github.com/containerd/containerd/mount" - "github.com/pkg/errors" ) type Mounter interface { @@ -32,33 +30,3 @@ type localMounter struct { target string release func() error } - -// RWDirMount extracts out just a writable directory from the provided mounts and returns it. -// It's intended to supply the directory to which changes being made to the mount can be -// written directly. A writable directory includes an upperdir if provided an overlay or a rw -// bind mount source. If the mount doesn't have a writable directory, an error is returned. -func getRWDir(mounts []mount.Mount) (string, error) { - if len(mounts) != 1 { - return "", errors.New("cannot extract writable directory from zero or multiple mounts") - } - mnt := mounts[0] - switch mnt.Type { - case "overlay": - for _, opt := range mnt.Options { - if strings.HasPrefix(opt, "upperdir=") { - upperdir := strings.SplitN(opt, "=", 2)[1] - return upperdir, nil - } - } - return "", errors.New("cannot extract writable directory from overlay mount without upperdir") - case "bind", "rbind": - for _, opt := range mnt.Options { - if opt == "ro" { - return "", errors.New("cannot extract writable directory from read-only bind mount") - } - } - return mnt.Source, nil - default: - return "", errors.Errorf("cannot extract writable directory from unhandled mount type %q", mnt.Type) - } -} diff --git a/snapshot/merge.go b/snapshot/merge.go index 1d48c263d6ca..75687cdcb4a2 100644 --- a/snapshot/merge.go +++ b/snapshot/merge.go @@ -56,40 +56,36 @@ type mergeSnapshotter struct { lm leases.Manager // Whether we should try to implement merges by hardlinking between underlying directories - useHardlinks bool + tryCrossSnapshotLink bool - // Whether the snapshotter is overlay-based, which enables some required behavior like - // creation of whiteout devices to represent deletes in addition to some optimizations - // like using the first merge input as the parent snapshot. + // Whether the snapshotter is overlay-based, which enables some some optimizations like + // using the first merge input as the parent snapshot. overlayBased bool } func NewMergeSnapshotter(ctx context.Context, sn Snapshotter, lm leases.Manager) MergeSnapshotter { name := sn.Name() - _, useHardlinks := hardlinkMergeSnapshotters[name] + _, tryCrossSnapshotLink := hardlinkMergeSnapshotters[name] _, overlayBased := overlayBasedSnapshotters[name] if overlayBased && userns.RunningInUserNS() { // When using an overlay-based snapshotter, if we are running rootless on a pre-5.11 // kernel, we will not have userxattr. This results in opaque xattrs not being visible - // to us and thus breaking the overlay-optimized differ. This also means that there are - // cases where in order to apply a deletion, we'd need to create a whiteout device but - // may not have access to one to hardlink, so we just fall back to not using hardlinks - // at all in this case. + // to us and thus breaking the overlay-optimized differ. userxattr, err := needsUserXAttr(ctx, sn, lm) if err != nil { bklog.G(ctx).Debugf("failed to check user xattr: %v", err) - useHardlinks = false + tryCrossSnapshotLink = false } else { - useHardlinks = userxattr + tryCrossSnapshotLink = userxattr } } return &mergeSnapshotter{ - Snapshotter: sn, - lm: lm, - useHardlinks: useHardlinks, - overlayBased: overlayBased, + Snapshotter: sn, + lm: lm, + tryCrossSnapshotLink: tryCrossSnapshotLink, + overlayBased: overlayBased, } } @@ -99,7 +95,8 @@ func (sn *mergeSnapshotter) Merge(ctx context.Context, key string, diffs []Diff, // Overlay-based snapshotters can skip the base snapshot of the merge (if one exists) and just use it as the // parent of the merge snapshot. Other snapshotters will start empty (with baseKey set to ""). // Find the baseKey by following the chain of diffs for as long as it follows the pattern of the current lower - // being the parent of the current upper and equal to the previous upper. + // being the parent of the current upper and equal to the previous upper, i.e.: + // Diff("", A) -> Diff(A, B) -> Diff(B, C), etc. var baseIndex int for i, diff := range diffs { info, err := sn.Stat(ctx, diff.Upper) @@ -134,37 +131,9 @@ func (sn *mergeSnapshotter) Merge(ctx context.Context, key string, diffs []Diff, return errors.Wrapf(err, "failed to get mounts of %q", key) } - // externalHardlinks keeps track of which inodes have been hard-linked between snapshots (which is - // enabled when sn.useHardlinks is set to true) - externalHardlinks := make(map[uint64]struct{}) - - for _, diff := range diffs { - var lowerMounts Mountable - if diff.Lower != "" { - viewID := identity.NewID() - var err error - lowerMounts, err = sn.View(tempLeaseCtx, viewID, diff.Lower) - if err != nil { - return errors.Wrapf(err, "failed to get mounts of lower %q", diff.Lower) - } - } - - viewID := identity.NewID() - upperMounts, err := sn.View(tempLeaseCtx, viewID, diff.Upper) - if err != nil { - return errors.Wrapf(err, "failed to get mounts of upper %q", diff.Upper) - } - - err = diffApply(tempLeaseCtx, lowerMounts, upperMounts, applyMounts, sn.useHardlinks, externalHardlinks, sn.overlayBased) - if err != nil { - return err - } - } - - // save the correctly calculated usage as a label on the committed key - usage, err := diskUsage(ctx, applyMounts, externalHardlinks) + usage, err := sn.diffApply(ctx, applyMounts, diffs...) if err != nil { - return errors.Wrap(err, "failed to get disk usage of diff apply merge") + return errors.Wrap(err, "failed to apply diffs") } if err := sn.Commit(ctx, key, prepareKey, withMergeUsage(usage)); err != nil { return errors.Wrapf(err, "failed to commit %q", key) diff --git a/snapshot/snapshotter_test.go b/snapshot/snapshotter_test.go index b65fdf811297..c2eb0577fabf 100644 --- a/snapshot/snapshotter_test.go +++ b/snapshot/snapshotter_test.go @@ -99,7 +99,7 @@ func newSnapshotter(ctx context.Context, snapshotterName string) (_ context.Cont lm := leaseutil.WithNamespace(ctdmetadata.NewLeaseManager(mdb), ns) snapshotter := NewMergeSnapshotter(ctx, FromContainerdSnapshotter(snapshotterName, mdb.Snapshotter(snapshotterName), nil), lm).(*mergeSnapshotter) if noHardlink { - snapshotter.useHardlinks = false + snapshotter.tryCrossSnapshotLink = false } leaseID := identity.NewID() @@ -198,6 +198,7 @@ func TestMerge(t *testing.T) { requireMtime(t, filepath.Join(root, "c"), ts.Add(6*time.Second)) requireMtime(t, filepath.Join(root, "foo"), ts.Add(4*time.Second)) requireMtime(t, filepath.Join(root, "bar"), ts.Add(12*time.Second)) + requireMtime(t, filepath.Join(root, "hardlink"), ts.Add(12*time.Second)) requireMtime(t, filepath.Join(root, "bar/A"), ts.Add(12*time.Second)) requireMtime(t, filepath.Join(root, "bar/B"), ts.Add(9*time.Second)) requireMtime(t, filepath.Join(root, "symlink"), ts.Add(3*time.Second)) diff --git a/util/overlay/overlay_linux.go b/util/overlay/overlay_linux.go index 26415d0d0231..63f1674e8719 100644 --- a/util/overlay/overlay_linux.go +++ b/util/overlay/overlay_linux.go @@ -46,7 +46,7 @@ func GetUpperdir(lower, upper []mount.Mount) (string, error) { case "overlay": // lower snapshot is an overlay mount of multiple layers var err error - lowerlayers, err = getOverlayLayers(lowerM) + lowerlayers, err = GetOverlayLayers(lowerM) if err != nil { return "", err } @@ -59,7 +59,7 @@ func GetUpperdir(lower, upper []mount.Mount) (string, error) { if upperM.Type != "overlay" { return "", errors.Errorf("upper snapshot isn't overlay mounted (type = %q)", upperM.Type) } - upperlayers, err := getOverlayLayers(upperM) + upperlayers, err := GetOverlayLayers(upperM) if err != nil { return "", err } @@ -83,8 +83,8 @@ func GetUpperdir(lower, upper []mount.Mount) (string, error) { return upperdir, nil } -// getOverlayLayers returns all layer directories of an overlayfs mount. -func getOverlayLayers(m mount.Mount) ([]string, error) { +// GetOverlayLayers returns all layer directories of an overlayfs mount. +func GetOverlayLayers(m mount.Mount) ([]string, error) { var u string var uFound bool var l []string // l[0] = bottommost