Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for fake, and speed up reproducible builds #12

Merged
merged 7 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion pkg/commands/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ limitations under the License.

package commands

import v1 "github.com/google/go-containerregistry/pkg/v1"
import (
"github.com/GoogleContainerTools/kaniko/pkg/dockerfile"
v1 "github.com/google/go-containerregistry/pkg/v1"
)

type Cached interface {
Layer() v1.Layer
Expand All @@ -29,3 +32,7 @@ type caching struct {
func (c caching) Layer() v1.Layer {
return c.layer
}

type FakeExecuteCommand interface {
FakeExecuteCommand(*v1.Config, *dockerfile.BuildArgs) error
}
23 changes: 23 additions & 0 deletions pkg/commands/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,29 @@ func (cr *CachingCopyCommand) ExecuteCommand(config *v1.Config, buildArgs *docke
return nil
}

func (cr *CachingCopyCommand) FakeExecuteCommand(config *v1.Config, buildArgs *dockerfile.BuildArgs) error {
logrus.Infof("Found cached layer, faking extraction to filesystem")
var err error

if cr.img == nil {
return errors.New(fmt.Sprintf("cached command image is nil %v", cr.String()))
}

layers, err := cr.img.Layers()
if err != nil {
return errors.Wrapf(err, "retrieve image layers")
}

if len(layers) != 1 {
return errors.New(fmt.Sprintf("expected %d layers but got %d", 1, len(layers)))
}

cr.layer = layers[0]
cr.extractedFiles = []string{}

return nil
}

func (cr *CachingCopyCommand) FilesUsedFromContext(config *v1.Config, buildArgs *dockerfile.BuildArgs) ([]string, error) {
return copyCmdFilesUsedFromContext(config, buildArgs, cr.cmd, cr.fileContext)
}
Expand Down
23 changes: 23 additions & 0 deletions pkg/commands/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,29 @@ func (cr *CachingRunCommand) ExecuteCommand(config *v1.Config, buildArgs *docker
return nil
}

func (cr *CachingRunCommand) FakeExecuteCommand(config *v1.Config, buildArgs *dockerfile.BuildArgs) error {
logrus.Infof("Found cached layer, faking extraction to filesystem")
var err error

if cr.img == nil {
return errors.New(fmt.Sprintf("command image is nil %v", cr.String()))
}

layers, err := cr.img.Layers()
if err != nil {
return errors.Wrap(err, "retrieving image layers")
}

if len(layers) != 1 {
return errors.New(fmt.Sprintf("expected %d layers but got %d", 1, len(layers)))
}

cr.layer = layers[0]
cr.extractedFiles = []string{}

return nil
}

func (cr *CachingRunCommand) FilesToSnapshot() []string {
f := cr.extractedFiles
logrus.Debugf("%d files extracted by caching run command", len(f))
Expand Down
232 changes: 230 additions & 2 deletions pkg/executor/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,12 @@ func newStageBuilder(args *dockerfile.BuildArgs, opts *config.KanikoOptions, sta
return nil, err
}
l := snapshot.NewLayeredMap(hasher)
snapshotter := snapshot.NewSnapshotter(l, config.RootDir)
var snapshotter snapShotter
if !opts.Reproducible {
snapshotter = snapshot.NewSnapshotter(l, config.RootDir)
} else {
snapshotter = snapshot.NewCanonicalSnapshotter(l, config.RootDir)
}

digest, err := sourceImage.Digest()
if err != nil {
Expand Down Expand Up @@ -444,6 +449,92 @@ func (s *stageBuilder) build() error {
return nil
}

// fakeBuild is like build(), but does not actually execute the commands or
// extract files.
func (s *stageBuilder) fakeBuild() error {
Copy link

@dannykopping dannykopping May 31, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAICS this aligns with the implementation of build(). I'm concerned that this will drift; how do you plan to keep these two in lock-step?

As an aside: I think the term "fake" is a problematic one.

This PR adds support for fake builds, essentially a way to detect if a build is cached and what the final image hash should be.

Perhaps renaming it to something (admittedly less pithy) like "cacheProbeBuild" or "preemptiveBuild" might be more clear?

The term "fake" is quite overloaded and I don't think it expresses the intent well here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a fair concern, one I have as well. I don't really have a plan for ensuring they're kept in sync other than adding tests to verify a build and "fakeBuild" produce the same (or not) hash in the end.

I also agree with you on "fake", and "cacheProbe" is actually a pretty good one, thanks for the suggestions.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would you think about making a separate fakeBuilder that embeds *stageBuilder but overrides the build() method?

e.g. https://go.dev/play/p/5ZxGr4ozeOV

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you also thinking we'd merge the logic in Build and FakeBuild into one, just swap out the stageBuilder? (My main reason to keep them separate was to remove anything that could accidentally cause changes to the fs.)

I'll think about it, definitely doable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

non-blocking, just a suggestion

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think what could solve both my concern (about lock-step with build()) and Cian's is an adapter that can be passed in which would effect the changes. For your "fake" case, this adapter would not interact with the FS even though it satisfies the implementation.

I haven't looked too closely at the implementation so maybe this is im{practical,possible}, but thought I'd mention it.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

non-blocking, just a suggestion

Ditto 👍

// Set the initial cache key to be the base image digest, the build args and the SrcContext.
var compositeKey *CompositeCache
if cacheKey, ok := s.digestToCacheKey[s.baseImageDigest]; ok {
compositeKey = NewCompositeCache(cacheKey)
} else {
compositeKey = NewCompositeCache(s.baseImageDigest)
}

// Apply optimizations to the instructions.
if err := s.optimize(*compositeKey, s.cf.Config); err != nil {
return errors.Wrap(err, "failed to optimize instructions")
}

for index, command := range s.cmds {
if command == nil {
continue
}

// If the command uses files from the context, add them.
files, err := command.FilesUsedFromContext(&s.cf.Config, s.args)
if err != nil {
return errors.Wrap(err, "failed to get files used from context")
}

if s.opts.Cache {
*compositeKey, err = s.populateCompositeKey(command, files, *compositeKey, s.args, s.cf.Config.Env)
if err != nil && s.opts.Cache {
return err
}
}

logrus.Info(command.String())

isCacheCommand := func() bool {
switch command.(type) {
case commands.Cached:
return true
default:
return false
}
}()

if c, ok := command.(commands.FakeExecuteCommand); ok {
if err := c.FakeExecuteCommand(&s.cf.Config, s.args); err != nil {
return errors.Wrap(err, "failed to execute fake command")
}
} else {
switch command.(type) {
case *commands.UserCommand:
default:
return errors.Errorf("uncached command %T is not supported in fake build", command)
}
if err := command.ExecuteCommand(&s.cf.Config, s.args); err != nil {
return errors.Wrap(err, "failed to execute command")
}
}
files = command.FilesToSnapshot()

if !s.shouldTakeSnapshot(index, command.MetadataOnly()) && !s.opts.ForceBuildMetadata {
logrus.Debugf("fakeBuild: skipping snapshot for [%v]", command.String())
continue
}
if isCacheCommand {
v := command.(commands.Cached)
layer := v.Layer()
if err := s.saveLayerToImage(layer, command.String()); err != nil {
return errors.Wrap(err, "failed to save layer")
}
} else {
tarPath, err := s.takeSnapshot(files, command.ShouldDetectDeletedFiles())
if err != nil {
return errors.Wrap(err, "failed to take snapshot")
}

if err := s.saveSnapshotToImage(command.String(), tarPath); err != nil {
return errors.Wrap(err, "failed to save snapshot to image")
}
}
}

return nil
}

func (s *stageBuilder) takeSnapshot(files []string, shdDelete bool) (string, error) {
var snapshot string
var err error
Expand Down Expand Up @@ -787,7 +878,9 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) {
return nil, err
}
if opts.Reproducible {
sourceImage, err = mutate.Canonical(sourceImage)
// If this option is enabled, we will use the canonical
// snapshotter to avoid having to modify the layers here.
sourceImage, err = mutateCanonicalWithoutLayerEdit(sourceImage)
if err != nil {
return nil, err
}
Expand All @@ -797,6 +890,7 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) {
return nil, err
}
}

timing.DefaultRun.Stop(t)
return sourceImage, nil
}
Expand Down Expand Up @@ -833,6 +927,140 @@ func DoBuild(opts *config.KanikoOptions) (v1.Image, error) {
return nil, err
}

// DoFakeBuild executes building the Dockerfile without modifying the
// filesystem, returns an error if build cache is not available.
func DoFakeBuild(opts *config.KanikoOptions) (v1.Image, error) {
digestToCacheKey := make(map[string]string)
stageIdxToDigest := make(map[string]string)

stages, metaArgs, err := dockerfile.ParseStages(opts)
if err != nil {
return nil, err
}

kanikoStages, err := dockerfile.MakeKanikoStages(opts, stages, metaArgs)
if err != nil {
return nil, err
}
stageNameToIdx := ResolveCrossStageInstructions(kanikoStages)

fileContext, err := util.NewFileContextFromDockerfile(opts.DockerfilePath, opts.SrcContext)
if err != nil {
return nil, err
}

// Some stages may refer to other random images, not previous stages
if err := fetchExtraStages(kanikoStages, opts); err != nil {
return nil, err
}
crossStageDependencies, err := CalculateDependencies(kanikoStages, opts, stageNameToIdx)
if err != nil {
return nil, err
}
logrus.Infof("Built cross stage deps: %v", crossStageDependencies)

var args *dockerfile.BuildArgs

for _, stage := range kanikoStages {
sb, err := newStageBuilder(
args, opts, stage,
crossStageDependencies,
digestToCacheKey,
stageIdxToDigest,
stageNameToIdx,
fileContext)
if err != nil {
return nil, err
}

args = sb.args
if err := sb.fakeBuild(); err != nil {
return nil, errors.Wrap(err, "error fake building stage")
}

reviewConfig(stage, &sb.cf.Config)

sourceImage, err := mutate.Config(sb.image, sb.cf.Config)
if err != nil {
return nil, err
}

configFile, err := sourceImage.ConfigFile()
if err != nil {
return nil, err
}
if opts.CustomPlatform == "" {
configFile.OS = runtime.GOOS
configFile.Architecture = runtime.GOARCH
} else {
configFile.OS = strings.Split(opts.CustomPlatform, "/")[0]
configFile.Architecture = strings.Split(opts.CustomPlatform, "/")[1]
}
sourceImage, err = mutate.ConfigFile(sourceImage, configFile)
if err != nil {
return nil, err
}

d, err := sourceImage.Digest()
if err != nil {
return nil, err
}
stageIdxToDigest[fmt.Sprintf("%d", sb.stage.Index)] = d.String()
logrus.Infof("Mapping stage idx %v to digest %v", sb.stage.Index, d.String())

digestToCacheKey[d.String()] = sb.finalCacheKey
logrus.Infof("Mapping digest %v to cachekey %v", d.String(), sb.finalCacheKey)

if stage.Final {
sourceImage, err = mutateCanonicalWithoutLayerEdit(sourceImage)
if err != nil {
return nil, err
}

return sourceImage, nil
}
}

return nil, err
}

// From mutate.Canonical with layer de/compress stripped out.
func mutateCanonicalWithoutLayerEdit(image v1.Image) (v1.Image, error) {
t := time.Time{}

ocf, err := image.ConfigFile()
if err != nil {
return nil, fmt.Errorf("setting config file: %w", err)
}

cfg := ocf.DeepCopy()

// Copy basic config over
cfg.Architecture = ocf.Architecture
cfg.OS = ocf.OS
cfg.OSVersion = ocf.OSVersion
cfg.Config = ocf.Config

// Strip away timestamps from the config file
cfg.Created = v1.Time{Time: t}

for i, h := range cfg.History {
h.Created = v1.Time{Time: t}
h.CreatedBy = ocf.History[i].CreatedBy
h.Comment = ocf.History[i].Comment
h.EmptyLayer = ocf.History[i].EmptyLayer
// Explicitly ignore Author field; which hinders reproducibility
h.Author = ""
cfg.History[i] = h
}

cfg.Container = ""
cfg.Config.Hostname = ""
cfg.DockerVersion = ""

return mutate.ConfigFile(image, cfg)
}

// filesToSave returns all the files matching the given pattern in deps.
// If a file is a symlink, it also returns the target file.
func filesToSave(deps []string) ([]string, error) {
Expand Down
21 changes: 19 additions & 2 deletions pkg/snapshot/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,20 @@ type Snapshotter struct {
l *LayeredMap
directory string
ignorelist []util.IgnoreListEntry
canonical bool
}

// NewSnapshotter creates a new snapshotter rooted at d
func NewSnapshotter(l *LayeredMap, d string) *Snapshotter {
return &Snapshotter{l: l, directory: d, ignorelist: util.IgnoreList()}
}

// NewCanonicalSnapshotter creates a new snapshotter rooted at d that produces
// reproducible snapshots.
func NewCanonicalSnapshotter(l *LayeredMap, d string) *Snapshotter {
return &Snapshotter{l: l, directory: d, ignorelist: util.IgnoreList(), canonical: true}
}

// Init initializes a new snapshotter
func (s *Snapshotter) Init() error {
logrus.Info("Initializing snapshotter ...")
Expand Down Expand Up @@ -112,7 +119,12 @@ func (s *Snapshotter) TakeSnapshot(files []string, shdCheckDelete bool, forceBui
sort.Strings(filesToWhiteout)
}

t := util.NewTar(f)
var t util.Tar
if !s.canonical {
t = util.NewTar(f)
} else {
t = util.NewCanonicalTar(f)
}
defer t.Close()
if err := writeToTar(t, filesToAdd, filesToWhiteout); err != nil {
return "", err
Expand All @@ -128,7 +140,12 @@ func (s *Snapshotter) TakeSnapshotFS() (string, error) {
return "", err
}
defer f.Close()
t := util.NewTar(f)
var t util.Tar
if !s.canonical {
t = util.NewTar(f)
} else {
t = util.NewCanonicalTar(f)
}
defer t.Close()

filesToAdd, filesToWhiteOut, err := s.scanFullFilesystem()
Expand Down
Loading
Loading