Skip to content

Commit db7ef1a

Browse files
committed
feat: Move Pdb checks to State server
In certain Networing scenarios, if the webhook is running in the host network it might not be able to connect to the other servers. For this reason we're isolating all network calls in between servers to not happen from the webhook pod. Signed-off-by: Samuel Torres <[email protected]>
1 parent 94a1e1d commit db7ef1a

File tree

16 files changed

+951
-257
lines changed

16 files changed

+951
-257
lines changed

charts/x-pdb/templates/_helpers.tpl

+8
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,11 @@ Create the name of the service account to use
9898
{{- default "default" .Values.serviceAccount.name }}
9999
{{- end }}
100100
{{- end }}
101+
102+
103+
{{/*
104+
Name of the service for the state service
105+
*/}}
106+
{{- define "x-pdb.stateServiceName" -}}
107+
{{ default (printf "%s-state" (include "x-pdb.fullname" .)) .Values.state.service.nameOverride }}
108+
{{- end }}

charts/x-pdb/templates/certificates.yaml

+7-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,13 @@ metadata:
3838
{{- include "x-pdb.stateLabels" . | nindent 4 }}
3939
spec:
4040
dnsNames:
41-
{{- toYaml .Values.certificates.state.certManager.dnsNames | nindent 4 }}
41+
- {{ include "x-pdb.stateServiceName" . }}
42+
- {{ include "x-pdb.stateServiceName" . }}.{{ include "x-pdb.namespace" . }}
43+
- {{ include "x-pdb.stateServiceName" . }}.{{ include "x-pdb.namespace" . }}.svc
44+
- {{ include "x-pdb.stateServiceName" . }}.{{ include "x-pdb.namespace" . }}.svc.cluster.local
45+
{{- range $name:= .Values.certificates.state.certManager.dnsNames }}
46+
- {{ $name }}
47+
{{- end }}
4248
ipAddresses:
4349
{{- toYaml .Values.certificates.state.certManager.ipAddresses | nindent 4 }}
4450
issuerRef:

charts/x-pdb/templates/state/deployment.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ spec:
4747
- "--zap-log-level={{ .Values.state.log.level }}"
4848
- "--state-certs-dir=/tmp/state-cert"
4949
- "--state-port={{ .Values.state.port }}"
50+
- "--remote-state-endpoints={{- join "," .Values.state.remoteStateEndpoints }}"
5051
- "--metrics-bind-address=:{{ .Values.state.metricsPort }}"
5152
- "--health-probe-bind-address=:{{ .Values.state.healthProbePort }}"
5253
{{- range $value := .Values.state.extraArgs }}

charts/x-pdb/templates/state/service.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
apiVersion: v1
22
kind: Service
33
metadata:
4-
name: {{ default (printf "%s-state" (include "x-pdb.fullname" .)) .Values.state.service.nameOverride }}
4+
name: {{ include "x-pdb.stateServiceName" . }}
55
namespace: {{ include "x-pdb.namespace" . }}
66
labels:
77
{{- include "x-pdb.stateLabels" . | nindent 4 }}

charts/x-pdb/templates/webhook/deployment.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ spec:
4545
args:
4646
- "--cluster-id={{ .Values.clusterID }}"
4747
- "--zap-log-level={{ .Values.webhook.log.level }}"
48-
- "--remote-endpoints={{- join "," .Values.webhook.remoteEndpoints }}"
48+
- "--local-state-endpoint={{ include "x-pdb.stateServiceName" . }}.{{ include "x-pdb.namespace" . }}.svc.cluster.local:443"
4949
- "--webhook-certs-dir=/tmp/webhook-cert"
5050
- "--state-certs-dir=/tmp/state-cert"
5151
- "--webhook-port={{ .Values.webhook.port }}"

charts/x-pdb/values.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ state:
1717
metricsPort: 8080
1818
log:
1919
level: info
20+
remoteStateEndpoints: []
2021
extraArgs: []
2122
# - "--dry-run=true"
2223
hostNetwork: false

cmd/state/main.go

+45-13
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,11 @@ limitations under the License.
1717
package main
1818

1919
import (
20+
"errors"
2021
"flag"
22+
"fmt"
2123
"os"
24+
"strings"
2225

2326
"github.com/form3tech-oss/x-pdb/internal/lock"
2427
"github.com/form3tech-oss/x-pdb/internal/pdb"
@@ -56,19 +59,21 @@ func init() {
5659
}
5760

5861
func main() {
62+
var clusterID string
63+
var dryRun bool
64+
var kubeContext string
65+
var leaseNamespace string
5966
var metricsAddr string
67+
var podID string
6068
var probeAddr string
69+
var remoteStateEndpoints string
6170
var stateCertsDir string
6271
var statePort int
63-
var leaseNamespace string
64-
var podID string
65-
var kubeContext string
66-
var clusterID string
67-
var dryRun bool
6872
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
6973
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
7074
flag.StringVar(&stateCertsDir, "state-certs-dir", "", "The directory that contains state server certificates")
7175
flag.IntVar(&statePort, "state-port", 9643, "The state server binding port")
76+
flag.StringVar(&remoteStateEndpoints, "remote-state-endpoints", "", "The list of endpoints of the remote pdb controllers")
7277
flag.StringVar(&leaseNamespace, "namespace", "kube-system", "the namespace in which the controller runs in")
7378
flag.StringVar(&podID, "pod-id", os.Getenv("HOSTNAME"),
7479
"The ID of the pod x-pdb pod. Used as prefix for the lease-holder-identity to obtain locks across clusters.",
@@ -113,6 +118,12 @@ func main() {
113118
os.Exit(1)
114119
}
115120

121+
remoteEndpointsList, err := parseEndpoints(remoteStateEndpoints)
122+
if err != nil {
123+
setupLog.Error(err, "unable to parse remote endpoints")
124+
os.Exit(1)
125+
}
126+
116127
stateClientPool := stateclient.NewClientPool(signalHandler, &logger, stateCertsDir)
117128

118129
lockService := lock.NewService(
@@ -121,7 +132,7 @@ func main() {
121132
mgr.GetAPIReader(),
122133
stateClientPool,
123134
leaseNamespace,
124-
[]string{},
135+
remoteEndpointsList,
125136
)
126137

127138
scaleFinder := pdb.NewScaleFinder(mgr.GetClient(), cli.DiscoveryClient)
@@ -131,14 +142,12 @@ func main() {
131142
scaleFinder,
132143
stateClientPool,
133144
leaseNamespace,
134-
[]string{})
145+
remoteEndpointsList)
135146

136-
{
137-
stateServer := stateserver.NewServer(pdbService, lockService, &logger, statePort, stateCertsDir)
138-
if err := mgr.Add(stateServer); err != nil {
139-
setupLog.Error(err, "unable to create state server")
140-
os.Exit(1)
141-
}
147+
stateServer := stateserver.NewServer(mgr.GetClient(), pdbService, lockService, &logger, statePort, stateCertsDir)
148+
if err := mgr.Add(stateServer); err != nil {
149+
setupLog.Error(err, "unable to create state server")
150+
os.Exit(1)
142151
}
143152

144153
// +kubebuilder:scaffold:builder
@@ -157,3 +166,26 @@ func main() {
157166
os.Exit(1)
158167
}
159168
}
169+
170+
func parseEndpoints(endpointString string) ([]string, error) {
171+
//nolint:prealloc
172+
var endpoints []string
173+
var errs []error
174+
splitEndpoints := strings.Split(endpointString, ",")
175+
176+
if len(splitEndpoints) == 1 && splitEndpoints[0] == "" {
177+
return endpoints, nil
178+
}
179+
180+
for _, ep := range splitEndpoints {
181+
sanitizedEndpoint := strings.TrimSpace(ep)
182+
183+
if sanitizedEndpoint == "" {
184+
errs = append(errs, fmt.Errorf("endpoint cannot be empty"))
185+
continue
186+
}
187+
188+
endpoints = append(endpoints, sanitizedEndpoint)
189+
}
190+
return endpoints, errors.Join(errs...)
191+
}

cmd/webhook/main.go

+30-69
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,10 @@ limitations under the License.
1717
package main
1818

1919
import (
20-
"errors"
2120
"flag"
22-
"fmt"
2321
"os"
24-
"strings"
2522

2623
"github.com/form3tech-oss/x-pdb/internal/disruptionprobe"
27-
"github.com/form3tech-oss/x-pdb/internal/lock"
2824
"github.com/form3tech-oss/x-pdb/internal/pdb"
2925
"github.com/form3tech-oss/x-pdb/internal/preactivities"
3026
stateclient "github.com/form3tech-oss/x-pdb/internal/state/client"
@@ -68,7 +64,7 @@ func main() {
6864
var webhookCertsDir string
6965
var webhookPort int
7066
var stateCertsDir string
71-
var remoteEndpoints string
67+
var localStateEndpoint string
7268
var leaseNamespace string
7369
var podID string
7470
var kubeContext string
@@ -79,7 +75,7 @@ func main() {
7975
flag.StringVar(&webhookCertsDir, "webhook-certs-dir", "", "The directory that contains webhook certificates")
8076
flag.IntVar(&webhookPort, "webhook-port", 9443, "The webhook binding port")
8177
flag.StringVar(&stateCertsDir, "state-certs-dir", "", "The directory that contains state server certificates")
82-
flag.StringVar(&remoteEndpoints, "remote-endpoints", "", "The list of endpoints of the remote pdb controllers")
78+
flag.StringVar(&localStateEndpoint, "local-state-endpoint", "x-pdb", "The address the probe endpoint binds to.")
8379
flag.StringVar(&leaseNamespace, "namespace", "kube-system", "the namespace in which the controller runs in")
8480
flag.StringVar(&podID, "pod-id", os.Getenv("HOSTNAME"),
8581
"The ID of the pod x-pdb pod. Used as prefix for the lease-holder-identity to obtain locks across clusters.",
@@ -124,23 +120,13 @@ func main() {
124120
os.Exit(1)
125121
}
126122

127-
remoteEndpointsList, err := parseEndpoints(remoteEndpoints)
123+
stateClientPool := stateclient.NewClientPool(signalHandler, &logger, stateCertsDir)
124+
stateClient, err := stateClientPool.Get(localStateEndpoint)
128125
if err != nil {
129-
setupLog.Error(err, "unable to parse remote endpoints")
126+
setupLog.Error(err, "unable to get a state client")
130127
os.Exit(1)
131128
}
132129

133-
stateClientPool := stateclient.NewClientPool(signalHandler, &logger, stateCertsDir)
134-
135-
lockService := lock.NewService(
136-
&logger,
137-
mgr.GetClient(),
138-
mgr.GetAPIReader(),
139-
stateClientPool,
140-
leaseNamespace,
141-
remoteEndpointsList,
142-
)
143-
144130
disruptionProbeClientPool := disruptionprobe.NewClientPool(signalHandler, &logger, stateCertsDir)
145131
disruptionProbeService := disruptionprobe.NewService(&logger, disruptionProbeClientPool)
146132

@@ -151,37 +137,35 @@ func main() {
151137
scaleFinder,
152138
stateClientPool,
153139
leaseNamespace,
154-
remoteEndpointsList)
140+
[]string{})
155141

156142
preactivitiesService := preactivities.NewService(logger, mgr.GetClient())
157143

158-
{
159-
hookServer := &webhook.DefaultServer{
160-
Options: webhook.Options{
161-
Port: webhookPort,
162-
CertDir: webhookCertsDir,
163-
},
164-
}
165-
if err := mgr.Add(hookServer); err != nil {
166-
setupLog.Error(err, "unable to create pod mutator webhook server")
167-
os.Exit(1)
168-
}
169-
decoder := admission.NewDecoder(mgr.GetScheme())
170-
podValidationWebhook := webhooks.NewPodValidationWebhook(
171-
mgr.GetClient(),
172-
logger,
173-
decoder,
174-
mgr.GetEventRecorderFor("x-pdb"),
175-
clusterID,
176-
podID,
177-
dryRun,
178-
pdbService,
179-
lockService,
180-
disruptionProbeService,
181-
preactivitiesService,
182-
)
183-
hookServer.Register("/validate", &webhook.Admission{Handler: podValidationWebhook})
144+
hookServer := &webhook.DefaultServer{
145+
Options: webhook.Options{
146+
Port: webhookPort,
147+
CertDir: webhookCertsDir,
148+
},
149+
}
150+
if err := mgr.Add(hookServer); err != nil {
151+
setupLog.Error(err, "unable to create pod mutator webhook server")
152+
os.Exit(1)
184153
}
154+
decoder := admission.NewDecoder(mgr.GetScheme())
155+
podValidationWebhook := webhooks.NewPodValidationWebhook(
156+
mgr.GetClient(),
157+
logger,
158+
decoder,
159+
mgr.GetEventRecorderFor("x-pdb"),
160+
clusterID,
161+
podID,
162+
dryRun,
163+
stateClient,
164+
pdbService,
165+
disruptionProbeService,
166+
preactivitiesService,
167+
)
168+
hookServer.Register("/validate", &webhook.Admission{Handler: podValidationWebhook})
185169

186170
// +kubebuilder:scaffold:builder
187171
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
@@ -199,26 +183,3 @@ func main() {
199183
os.Exit(1)
200184
}
201185
}
202-
203-
func parseEndpoints(endpointString string) ([]string, error) {
204-
//nolint:prealloc
205-
var endpoints []string
206-
var errs []error
207-
splitEndpoints := strings.Split(endpointString, ",")
208-
209-
if len(splitEndpoints) == 1 && splitEndpoints[0] == "" {
210-
return endpoints, nil
211-
}
212-
213-
for _, ep := range splitEndpoints {
214-
sanitizedEndpoint := strings.TrimSpace(ep)
215-
216-
if sanitizedEndpoint == "" {
217-
errs = append(errs, fmt.Errorf("endpoint cannot be empty"))
218-
continue
219-
}
220-
221-
endpoints = append(endpoints, sanitizedEndpoint)
222-
}
223-
return endpoints, errors.Join(errs...)
224-
}

hack/env/xpdb-values.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@ webhook:
77
extraArgs:
88
- --zap-stacktrace-level=panic
99
automountServiceAccountToken: true
10+
log:
11+
level: debug
1012
state:
13+
log:
14+
level: debug
1115
image:
1216
tag: "latest"
1317
extraArgs:

hack/install-xpdb.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ fi
3838
helm upgrade -i x-pdb ./charts/x-pdb \
3939
-f "hack/env/xpdb-values.yaml" \
4040
--set clusterID="${CLUSTER}" \
41-
--set webhook.remoteEndpoints="$remote_endpoints" \
41+
--set state.remoteStateEndpoints="$remote_endpoints" \
4242
--set certificates.state.certManager.ipAddresses="{$this_address}" \
4343
--set state.service.loadBalancerIP="$this_address" \
4444
--kube-context="${CONTEXT}"

0 commit comments

Comments
 (0)