Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add retries to auto.ipxe kernel/initrd downloads: #432

Merged
merged 5 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/smee/flag.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ func ipxeHTTPScriptFlags(c *config, fs *flag.FlagSet) {
fs.StringVar(&c.ipxeHTTPScript.hookURL, "osie-url", "", "[http] URL where OSIE (HookOS) images are located")
fs.StringVar(&c.ipxeHTTPScript.tinkServer, "tink-server", "", "[http] IP:Port for the Tink server")
fs.BoolVar(&c.ipxeHTTPScript.tinkServerUseTLS, "tink-server-tls", false, "[http] use TLS for Tink server")
fs.IntVar(&c.ipxeHTTPScript.retries, "ipxe-script-retries", 0, "[http] number of retries to attempt when fetching kernel and initrd files in the iPXE script")
}

func dhcpFlags(c *config, fs *flag.FlagSet) {
Expand Down
1 change: 1 addition & 0 deletions cmd/smee/flag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ FLAGS
-http-addr [http] local IP:Port to listen on for iPXE HTTP script requests (default "%[1]v:80")
-http-ipxe-binary-enabled [http] enable iPXE HTTP binary server (default "true")
-http-ipxe-script-enabled [http] enable iPXE HTTP script server (default "true")
-ipxe-script-retries [http] number of retries to attempt when fetching kernel and initrd files in the iPXE script (default "0")
-osie-url [http] URL where OSIE (HookOS) images are located
-tink-server [http] IP:Port for the Tink server
-tink-server-tls [http] use TLS for Tink server (default "false")
Expand Down
2 changes: 2 additions & 0 deletions cmd/smee/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
tinkServerUseTLS bool
trustedProxies string
disableDiscoverTrustedProxies bool
retries int
}

type dhcpConfig struct {
Expand Down Expand Up @@ -219,6 +220,7 @@
PublicSyslogFQDN: cfg.dhcp.syslogIP,
TinkServerTLS: cfg.ipxeHTTPScript.tinkServerUseTLS,
TinkServerGRPCAddr: cfg.ipxeHTTPScript.tinkServer,
IPXEScriptRetries: cfg.ipxeHTTPScript.retries,

Check warning on line 223 in cmd/smee/main.go

View check run for this annotation

Codecov / codecov/patch

cmd/smee/main.go#L223

Added line #L223 was not covered by tests
}
// serve ipxe script from the "/" URI.
handlers["/"] = jh.HandlerFunc()
Expand Down
52 changes: 46 additions & 6 deletions internal/ipxe/script/auto_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ func TestGenerateTemplate(t *testing.T) {
Facility: "onprem",
ExtraKernelParams: []string{"tink_worker_image=quay.io/tinkerbell/tink-worker:v0.8.0", "tinkerbell=packet"},
HWAddr: "3c:ec:ef:4c:4f:54",
Retries: 10,
},
script: HookScript,
want: `#!ipxe
Expand All @@ -32,14 +33,33 @@ echo Loading the Tinkerbell Hook iPXE script...

set arch x86_64
set download-url http://location:8080/to/kernel/and/initrd
set retries:int32 10

set idx:int32 0
:retry_kernel
kernel ${download-url}/vmlinuz-${arch} tink_worker_image=quay.io/tinkerbell/tink-worker:v0.8.0 tinkerbell=packet \
facility=onprem syslog_host=1.2.3.4 grpc_authority=1.2.3.4:42113 tinkerbell_tls=false worker_id=3c:ec:ef:4c:4f:54 hw_addr=3c:ec:ef:4c:4f:54 \
modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200
modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200 || iseq ${idx} ${retries} && goto kernel-error || inc idx && goto retry_kernel

initrd ${download-url}/initramfs-${arch}
set idx:int32 0
:retry_initrd
initrd ${download-url}/initramfs-${arch} || iseq ${idx} ${retries} && goto initrd-error || inc idx && goto retry_initrd

boot
set idx:int32 0
:retry_boot
boot || iseq ${idx} ${retries} && goto boot-error || inc idx && goto retry_boot

:kernel-error
echo Failed to load kernel
exit

:initrd-error
echo Failed to load initrd
exit

:boot-error
echo Failed to boot
exit
`,
},
"with vlan": {
Expand All @@ -54,6 +74,7 @@ boot
ExtraKernelParams: []string{"tink_worker_image=quay.io/tinkerbell/tink-worker:v0.8.0", "tinkerbell=packet"},
HWAddr: "3c:ec:ef:4c:4f:54",
VLANID: "16",
Retries: 10,
},
script: HookScript,
want: `#!ipxe
Expand All @@ -62,14 +83,33 @@ echo Loading the Tinkerbell Hook iPXE script...

set arch x86_64
set download-url http://location:8080/to/kernel/and/initrd
set retries:int32 10

set idx:int32 0
:retry_kernel
kernel ${download-url}/vmlinuz-${arch} vlan_id=16 tink_worker_image=quay.io/tinkerbell/tink-worker:v0.8.0 tinkerbell=packet \
facility=onprem syslog_host=1.2.3.4 grpc_authority=1.2.3.4:42113 tinkerbell_tls=false worker_id=3c:ec:ef:4c:4f:54 hw_addr=3c:ec:ef:4c:4f:54 \
modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200
modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200 || iseq ${idx} ${retries} && goto kernel-error || inc idx && goto retry_kernel

set idx:int32 0
:retry_initrd
initrd ${download-url}/initramfs-${arch} || iseq ${idx} ${retries} && goto initrd-error || inc idx && goto retry_initrd

set idx:int32 0
:retry_boot
boot || iseq ${idx} ${retries} && goto boot-error || inc idx && goto retry_boot

:kernel-error
echo Failed to load kernel
exit

initrd ${download-url}/initramfs-${arch}
:initrd-error
echo Failed to load initrd
exit

boot
:boot-error
echo Failed to boot
exit
`,
},
"parse error": {
Expand Down
26 changes: 23 additions & 3 deletions internal/ipxe/script/hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,33 @@ echo Debug TraceID: {{ .TraceID }}

set arch {{ .Arch }}
set download-url {{ .DownloadURL }}
set retries:int32 {{ .Retries }}

set idx:int32 0
:retry_kernel
kernel ${download-url}/vmlinuz-${arch} {{- if ne .VLANID "" }} vlan_id={{ .VLANID }} {{- end }} {{- range .ExtraKernelParams}} {{.}} {{- end}} \
facility={{ .Facility }} syslog_host={{ .SyslogHost }} grpc_authority={{ .TinkGRPCAuthority }} tinkerbell_tls={{ .TinkerbellTLS }} worker_id={{ .WorkerID }} hw_addr={{ .HWAddr }} \
modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200
modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200 || iseq ${idx} ${retries} && goto kernel-error || inc idx && goto retry_kernel

initrd ${download-url}/initramfs-${arch}
set idx:int32 0
:retry_initrd
initrd ${download-url}/initramfs-${arch} || iseq ${idx} ${retries} && goto initrd-error || inc idx && goto retry_initrd

boot
set idx:int32 0
:retry_boot
boot || iseq ${idx} ${retries} && goto boot-error || inc idx && goto retry_boot

:kernel-error
echo Failed to load kernel
exit

:initrd-error
echo Failed to load initrd
exit

:boot-error
echo Failed to boot
exit
`

// Hook holds the values used to generate the iPXE script that loads the Hook OS.
Expand All @@ -34,4 +53,5 @@ type Hook struct {
TraceID string
VLANID string // string number between 1-4095
WorkerID string // example 3c:ec:ef:4c:4f:54 or worker1
Retries int // number of retries to attempt when fetching kernel and initrd files
}
2 changes: 2 additions & 0 deletions internal/ipxe/script/ipxe.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ type Handler struct {
PublicSyslogFQDN string
TinkServerTLS bool
TinkServerGRPCAddr string
IPXEScriptRetries int
}

type data struct {
Expand Down Expand Up @@ -227,6 +228,7 @@ func (h *Handler) defaultScript(span trace.Span, hw data) (string, error) {
TinkGRPCAuthority: h.TinkServerGRPCAddr,
VLANID: hw.VLANID,
WorkerID: wID,
Retries: h.IPXEScriptRetries,
}
if sc := span.SpanContext(); sc.IsSampled() {
auto.TraceID = sc.TraceID().String()
Expand Down
28 changes: 24 additions & 4 deletions internal/ipxe/script/ipxe_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,33 @@ echo Loading the Tinkerbell Hook iPXE script...

set arch x86_64
set download-url http://127.1.1.1
set retries:int32 10

set idx:int32 0
:retry_kernel
kernel ${download-url}/vmlinuz-${arch} vlan_id=1234 \
facility=onprem syslog_host= grpc_authority= tinkerbell_tls=false worker_id=00:01:02:03:04:05 hw_addr=00:01:02:03:04:05 \
modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200
modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200 || iseq ${idx} ${retries} && goto kernel-error || inc idx && goto retry_kernel

initrd ${download-url}/initramfs-${arch}
set idx:int32 0
:retry_initrd
initrd ${download-url}/initramfs-${arch} || iseq ${idx} ${retries} && goto initrd-error || inc idx && goto retry_initrd

boot
set idx:int32 0
:retry_boot
boot || iseq ${idx} ${retries} && goto boot-error || inc idx && goto retry_boot

:kernel-error
echo Failed to load kernel
exit

:initrd-error
echo Failed to load initrd
exit

:boot-error
echo Failed to boot
exit
`
tests := map[string]struct {
want string
Expand All @@ -67,7 +86,8 @@ boot
for name, tt := range tests {
t.Run(name, func(t *testing.T) {
h := &Handler{
OSIEURL: "http://127.1.1.1",
OSIEURL: "http://127.1.1.1",
IPXEScriptRetries: 10,
}
d := data{MACAddress: net.HardwareAddr{0x00, 0x01, 0x02, 0x03, 0x04, 0x05}, VLANID: "1234", Facility: "onprem", Arch: "x86_64"}
sp := trace.SpanFromContext(context.Background())
Expand Down
Loading