diff --git a/cmd/smee/flag.go b/cmd/smee/flag.go index aad0adce..5632ece8 100644 --- a/cmd/smee/flag.go +++ b/cmd/smee/flag.go @@ -106,6 +106,7 @@ func ipxeHTTPScriptFlags(c *config, fs *flag.FlagSet) { fs.StringVar(&c.ipxeHTTPScript.hookURL, "osie-url", "", "[http] URL where OSIE (HookOS) images are located") fs.StringVar(&c.ipxeHTTPScript.tinkServer, "tink-server", "", "[http] IP:Port for the Tink server") fs.BoolVar(&c.ipxeHTTPScript.tinkServerUseTLS, "tink-server-tls", false, "[http] use TLS for Tink server") + fs.IntVar(&c.ipxeHTTPScript.retries, "ipxe-script-retries", 0, "[http] number of retries to attempt when fetching kernel and initrd files in the iPXE script") } func dhcpFlags(c *config, fs *flag.FlagSet) { diff --git a/cmd/smee/flag_test.go b/cmd/smee/flag_test.go index 52c85226..a7394e2e 100644 --- a/cmd/smee/flag_test.go +++ b/cmd/smee/flag_test.go @@ -111,6 +111,7 @@ FLAGS -http-addr [http] local IP:Port to listen on for iPXE HTTP script requests (default "%[1]v:80") -http-ipxe-binary-enabled [http] enable iPXE HTTP binary server (default "true") -http-ipxe-script-enabled [http] enable iPXE HTTP script server (default "true") + -ipxe-script-retries [http] number of retries to attempt when fetching kernel and initrd files in the iPXE script (default "0") -osie-url [http] URL where OSIE (HookOS) images are located -tink-server [http] IP:Port for the Tink server -tink-server-tls [http] use TLS for Tink server (default "false") diff --git a/cmd/smee/main.go b/cmd/smee/main.go index a96e192e..d50c5ceb 100644 --- a/cmd/smee/main.go +++ b/cmd/smee/main.go @@ -83,6 +83,7 @@ type ipxeHTTPScript struct { tinkServerUseTLS bool trustedProxies string disableDiscoverTrustedProxies bool + retries int } type dhcpConfig struct { @@ -219,6 +220,7 @@ func main() { PublicSyslogFQDN: cfg.dhcp.syslogIP, TinkServerTLS: cfg.ipxeHTTPScript.tinkServerUseTLS, TinkServerGRPCAddr: cfg.ipxeHTTPScript.tinkServer, + IPXEScriptRetries: cfg.ipxeHTTPScript.retries, } // serve ipxe script from the "/" URI. handlers["/"] = jh.HandlerFunc() diff --git a/internal/ipxe/script/auto_test.go b/internal/ipxe/script/auto_test.go index 82a87a58..e0363f03 100644 --- a/internal/ipxe/script/auto_test.go +++ b/internal/ipxe/script/auto_test.go @@ -24,6 +24,7 @@ func TestGenerateTemplate(t *testing.T) { Facility: "onprem", ExtraKernelParams: []string{"tink_worker_image=quay.io/tinkerbell/tink-worker:v0.8.0", "tinkerbell=packet"}, HWAddr: "3c:ec:ef:4c:4f:54", + Retries: 10, }, script: HookScript, want: `#!ipxe @@ -32,14 +33,33 @@ echo Loading the Tinkerbell Hook iPXE script... set arch x86_64 set download-url http://location:8080/to/kernel/and/initrd +set retries:int32 10 +set idx:int32 0 +:retry_kernel kernel ${download-url}/vmlinuz-${arch} tink_worker_image=quay.io/tinkerbell/tink-worker:v0.8.0 tinkerbell=packet \ facility=onprem syslog_host=1.2.3.4 grpc_authority=1.2.3.4:42113 tinkerbell_tls=false worker_id=3c:ec:ef:4c:4f:54 hw_addr=3c:ec:ef:4c:4f:54 \ -modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200 +modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200 || iseq ${idx} ${retries} && goto kernel-error || inc idx && goto retry_kernel -initrd ${download-url}/initramfs-${arch} +set idx:int32 0 +:retry_initrd +initrd ${download-url}/initramfs-${arch} || iseq ${idx} ${retries} && goto initrd-error || inc idx && goto retry_initrd -boot +set idx:int32 0 +:retry_boot +boot || iseq ${idx} ${retries} && goto boot-error || inc idx && goto retry_boot + +:kernel-error +echo Failed to load kernel +exit + +:initrd-error +echo Failed to load initrd +exit + +:boot-error +echo Failed to boot +exit `, }, "with vlan": { @@ -54,6 +74,7 @@ boot ExtraKernelParams: []string{"tink_worker_image=quay.io/tinkerbell/tink-worker:v0.8.0", "tinkerbell=packet"}, HWAddr: "3c:ec:ef:4c:4f:54", VLANID: "16", + Retries: 10, }, script: HookScript, want: `#!ipxe @@ -62,14 +83,33 @@ echo Loading the Tinkerbell Hook iPXE script... set arch x86_64 set download-url http://location:8080/to/kernel/and/initrd +set retries:int32 10 +set idx:int32 0 +:retry_kernel kernel ${download-url}/vmlinuz-${arch} vlan_id=16 tink_worker_image=quay.io/tinkerbell/tink-worker:v0.8.0 tinkerbell=packet \ facility=onprem syslog_host=1.2.3.4 grpc_authority=1.2.3.4:42113 tinkerbell_tls=false worker_id=3c:ec:ef:4c:4f:54 hw_addr=3c:ec:ef:4c:4f:54 \ -modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200 +modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200 || iseq ${idx} ${retries} && goto kernel-error || inc idx && goto retry_kernel + +set idx:int32 0 +:retry_initrd +initrd ${download-url}/initramfs-${arch} || iseq ${idx} ${retries} && goto initrd-error || inc idx && goto retry_initrd + +set idx:int32 0 +:retry_boot +boot || iseq ${idx} ${retries} && goto boot-error || inc idx && goto retry_boot + +:kernel-error +echo Failed to load kernel +exit -initrd ${download-url}/initramfs-${arch} +:initrd-error +echo Failed to load initrd +exit -boot +:boot-error +echo Failed to boot +exit `, }, "parse error": { diff --git a/internal/ipxe/script/hook.go b/internal/ipxe/script/hook.go index a9ba4ed1..71d3b7be 100644 --- a/internal/ipxe/script/hook.go +++ b/internal/ipxe/script/hook.go @@ -10,14 +10,33 @@ echo Debug TraceID: {{ .TraceID }} set arch {{ .Arch }} set download-url {{ .DownloadURL }} +set retries:int32 {{ .Retries }} +set idx:int32 0 +:retry_kernel kernel ${download-url}/vmlinuz-${arch} {{- if ne .VLANID "" }} vlan_id={{ .VLANID }} {{- end }} {{- range .ExtraKernelParams}} {{.}} {{- end}} \ facility={{ .Facility }} syslog_host={{ .SyslogHost }} grpc_authority={{ .TinkGRPCAuthority }} tinkerbell_tls={{ .TinkerbellTLS }} worker_id={{ .WorkerID }} hw_addr={{ .HWAddr }} \ -modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200 +modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200 || iseq ${idx} ${retries} && goto kernel-error || inc idx && goto retry_kernel -initrd ${download-url}/initramfs-${arch} +set idx:int32 0 +:retry_initrd +initrd ${download-url}/initramfs-${arch} || iseq ${idx} ${retries} && goto initrd-error || inc idx && goto retry_initrd -boot +set idx:int32 0 +:retry_boot +boot || iseq ${idx} ${retries} && goto boot-error || inc idx && goto retry_boot + +:kernel-error +echo Failed to load kernel +exit + +:initrd-error +echo Failed to load initrd +exit + +:boot-error +echo Failed to boot +exit ` // Hook holds the values used to generate the iPXE script that loads the Hook OS. @@ -34,4 +53,5 @@ type Hook struct { TraceID string VLANID string // string number between 1-4095 WorkerID string // example 3c:ec:ef:4c:4f:54 or worker1 + Retries int // number of retries to attempt when fetching kernel and initrd files } diff --git a/internal/ipxe/script/ipxe.go b/internal/ipxe/script/ipxe.go index 2e989c9a..dfd22c6c 100644 --- a/internal/ipxe/script/ipxe.go +++ b/internal/ipxe/script/ipxe.go @@ -26,6 +26,7 @@ type Handler struct { PublicSyslogFQDN string TinkServerTLS bool TinkServerGRPCAddr string + IPXEScriptRetries int } type data struct { @@ -227,6 +228,7 @@ func (h *Handler) defaultScript(span trace.Span, hw data) (string, error) { TinkGRPCAuthority: h.TinkServerGRPCAddr, VLANID: hw.VLANID, WorkerID: wID, + Retries: h.IPXEScriptRetries, } if sc := span.SpanContext(); sc.IsSampled() { auto.TraceID = sc.TraceID().String() diff --git a/internal/ipxe/script/ipxe_test.go b/internal/ipxe/script/ipxe_test.go index a3ceb169..28d12868 100644 --- a/internal/ipxe/script/ipxe_test.go +++ b/internal/ipxe/script/ipxe_test.go @@ -50,14 +50,33 @@ echo Loading the Tinkerbell Hook iPXE script... set arch x86_64 set download-url http://127.1.1.1 +set retries:int32 10 +set idx:int32 0 +:retry_kernel kernel ${download-url}/vmlinuz-${arch} vlan_id=1234 \ facility=onprem syslog_host= grpc_authority= tinkerbell_tls=false worker_id=00:01:02:03:04:05 hw_addr=00:01:02:03:04:05 \ -modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200 +modules=loop,squashfs,sd-mod,usb-storage intel_iommu=on iommu=pt initrd=initramfs-${arch} console=tty0 console=ttyS1,115200 || iseq ${idx} ${retries} && goto kernel-error || inc idx && goto retry_kernel -initrd ${download-url}/initramfs-${arch} +set idx:int32 0 +:retry_initrd +initrd ${download-url}/initramfs-${arch} || iseq ${idx} ${retries} && goto initrd-error || inc idx && goto retry_initrd -boot +set idx:int32 0 +:retry_boot +boot || iseq ${idx} ${retries} && goto boot-error || inc idx && goto retry_boot + +:kernel-error +echo Failed to load kernel +exit + +:initrd-error +echo Failed to load initrd +exit + +:boot-error +echo Failed to boot +exit ` tests := map[string]struct { want string @@ -67,7 +86,8 @@ boot for name, tt := range tests { t.Run(name, func(t *testing.T) { h := &Handler{ - OSIEURL: "http://127.1.1.1", + OSIEURL: "http://127.1.1.1", + IPXEScriptRetries: 10, } d := data{MACAddress: net.HardwareAddr{0x00, 0x01, 0x02, 0x03, 0x04, 0x05}, VLANID: "1234", Facility: "onprem", Arch: "x86_64"} sp := trace.SpanFromContext(context.Background())