@@ -2,6 +2,7 @@ package controller
2
2
3
3
import (
4
4
"context"
5
+ "encoding/json"
5
6
"fmt"
6
7
"net/http"
7
8
"sort"
@@ -39,6 +40,7 @@ const (
39
40
// How frequently informers should resync. This is also the frequency at which
40
41
// the operator reconciles even if no changes are made to the watched resources.
41
42
informerSyncInterval = 5 * time .Minute
43
+ lastAppConfAnnKey = "kubectl.kubernetes.io/last-applied-configuration"
42
44
)
43
45
44
46
type httpClient interface {
@@ -68,14 +70,17 @@ type RolloutController struct {
68
70
stopCh chan struct {}
69
71
70
72
// Metrics.
71
- groupReconcileTotal * prometheus.CounterVec
72
- groupReconcileFailed * prometheus.CounterVec
73
- groupReconcileDuration * prometheus.HistogramVec
74
- groupReconcileLastSuccess * prometheus.GaugeVec
75
- desiredReplicas * prometheus.GaugeVec
76
- scaleDownBoolean * prometheus.GaugeVec
77
- downscaleProbeTotal * prometheus.CounterVec
78
- downscaleProbeFailureTotal * prometheus.CounterVec
73
+ groupReconcileTotal * prometheus.CounterVec
74
+ groupReconcileFailed * prometheus.CounterVec
75
+ groupReconcileDuration * prometheus.HistogramVec
76
+ groupReconcileLastSuccess * prometheus.GaugeVec
77
+ desiredReplicas * prometheus.GaugeVec
78
+ downscaleProbeTotal * prometheus.CounterVec
79
+ removeLastAppliedReplicasTotal * prometheus.CounterVec
80
+ removeLastAppliedReplicasEmptyTotal * prometheus.CounterVec
81
+ removeLastAppliedReplicasErrorTotal * prometheus.CounterVec
82
+ lastAppliedReplicasRemovedTotal * prometheus.CounterVec
83
+ downscaleState * prometheus.GaugeVec
79
84
80
85
// Keep track of discovered rollout groups. We use this information to delete metrics
81
86
// related to rollout groups that have been decommissioned.
@@ -135,18 +140,30 @@ func NewRolloutController(kubeClient kubernetes.Interface, restMapper meta.RESTM
135
140
Name : "rollout_operator_statefulset_desired_replicas" ,
136
141
Help : "Desired replicas of a Statefulset parsed from CRD." ,
137
142
}, []string {"statefulset_name" }),
138
- scaleDownBoolean : promauto .With (reg ).NewGaugeVec (prometheus.GaugeOpts {
139
- Name : "rollout_operator_scale_down_boolean" ,
140
- Help : "Boolean for whether an ingester pod is ready to scale down." ,
141
- }, []string {"scale_down_pod_name" }),
142
143
downscaleProbeTotal : promauto .With (reg ).NewCounterVec (prometheus.CounterOpts {
143
144
Name : "rollout_operator_downscale_probe_total" ,
144
145
Help : "Total number of downscale probes." ,
145
- }, []string {"scale_down_pod_name" }),
146
- downscaleProbeFailureTotal : promauto .With (reg ).NewCounterVec (prometheus.CounterOpts {
147
- Name : "rollout_operator_downscale_probe_failure_total" ,
148
- Help : "Total number of failed downscale probes." ,
149
- }, []string {"scale_down_pod_name" }),
146
+ }, []string {"scale_down_pod_name" , "status" }),
147
+ removeLastAppliedReplicasTotal : promauto .With (reg ).NewCounterVec (prometheus.CounterOpts {
148
+ Name : "rollout_operator_remove_last_applied_replicas_total" ,
149
+ Help : "Total number of removal of .spec.replicas field from last-applied-configuration annotation." ,
150
+ }, []string {"statefulset_name" }),
151
+ removeLastAppliedReplicasEmptyTotal : promauto .With (reg ).NewCounterVec (prometheus.CounterOpts {
152
+ Name : "rollout_operator_remove_last_applied_replicas_empty_total" ,
153
+ Help : "Total number of empty .spec.replicas field from last-applied-configuration annotation." ,
154
+ }, []string {"statefulset_name" }),
155
+ removeLastAppliedReplicasErrorTotal : promauto .With (reg ).NewCounterVec (prometheus.CounterOpts {
156
+ Name : "rollout_operator_remove_last_applied_replicas_error_total" ,
157
+ Help : "Total number of errors while removing .spec.replicas field from last-applied-configuration annotation." ,
158
+ }, []string {"statefulset_name" , "error" }),
159
+ lastAppliedReplicasRemovedTotal : promauto .With (reg ).NewCounterVec (prometheus.CounterOpts {
160
+ Name : "rollout_operator_last_applied_replicas_removed_total" ,
161
+ Help : "Total number of .spec.replicas fields removed from last-applied-configuration annotation." ,
162
+ }, []string {"statefulset_name" }),
163
+ downscaleState : promauto .With (reg ).NewGaugeVec (prometheus.GaugeOpts {
164
+ Name : "rollout_operator_downscale_state" ,
165
+ Help : "State of the downscale operation." ,
166
+ }, []string {"statefulset_name" }),
150
167
}
151
168
152
169
return c
@@ -230,7 +247,7 @@ func (c *RolloutController) reconcile(ctx context.Context) error {
230
247
span , ctx := opentracing .StartSpanFromContext (ctx , "RolloutController.reconcile()" )
231
248
defer span .Finish ()
232
249
233
- level .Info (c .logger ).Log ("msg" , "reconcile started " )
250
+ level .Info (c .logger ).Log ("msg" , "================ RECONCILE START ================ " )
234
251
235
252
sets , err := c .listStatefulSetsWithRolloutGroup ()
236
253
if err != nil {
@@ -252,7 +269,8 @@ func (c *RolloutController) reconcile(ctx context.Context) error {
252
269
253
270
c .deleteMetricsForDecommissionedGroups (groups )
254
271
255
- level .Info (c .logger ).Log ("msg" , "reconcile done" )
272
+ level .Info (c .logger ).Log ("msg" , "================ RECONCILE DONE ================" )
273
+
256
274
return nil
257
275
}
258
276
@@ -276,6 +294,12 @@ func (c *RolloutController) reconcileStatefulSetsGroup(ctx context.Context, grou
276
294
// Sort StatefulSets to provide a deterministic behaviour.
277
295
util .SortStatefulSets (sets )
278
296
297
+ for _ , s := range sets {
298
+ if err := c .removeReplicasFromLastApplied (ctx , s ); err != nil {
299
+ level .Error (c .logger ).Log ("msg" , "failed to remove replicas from last-applied-configuration annotation" , "statefulset" , s .Name , "err" , err )
300
+ }
301
+ }
302
+
279
303
// Adjust the number of replicas for each StatefulSet in the group if desired. If the number of
280
304
// replicas of any StatefulSet was adjusted, return early in order to guarantee each STS model is
281
305
// up-to-date.
@@ -517,7 +541,7 @@ func (c *RolloutController) listPods(sel labels.Selector) ([]*corev1.Pod, error)
517
541
}
518
542
519
543
func (c * RolloutController ) updateStatefulSetPods (ctx context.Context , sts * v1.StatefulSet ) (bool , error ) {
520
- level .Debug (c .logger ).Log ("msg" , "reconciling StatefulSet============== " , "statefulset" , sts .Name )
544
+ level .Debug (c .logger ).Log ("msg" , "reconciling StatefulSet" , "statefulset" , sts .Name )
521
545
522
546
podsToUpdate , err := c .podsNotMatchingUpdateRevision (sts )
523
547
if err != nil {
@@ -678,3 +702,74 @@ func (c *RolloutController) patchStatefulSetSpecReplicas(ctx context.Context, st
678
702
_ , err := c .kubeClient .AppsV1 ().StatefulSets (c .namespace ).Patch (ctx , sts .GetName (), types .StrategicMergePatchType , []byte (patch ), metav1.PatchOptions {})
679
703
return err
680
704
}
705
+
706
+ // removeReplicasFromLastApplied deletes .spec.replicas from the
707
+ // kubectl.kubernetes.io/last-applied-configuration annotation on a StatefulSet.
708
+ func (c * RolloutController ) removeReplicasFromLastApplied (
709
+ ctx context.Context ,
710
+ sts * v1.StatefulSet ,
711
+ ) error {
712
+ const noAnnotationErr = "NoAnnotationErr"
713
+ const lastAppliedNotFoundErr = "LastAppliedNotFoundErr"
714
+ const specNotFoundErr = "SpecNotFoundErr"
715
+ const jsonDecodeErr = "JsonDecodeErr"
716
+ const jsonEncodeErr = "JsonEncodeErr"
717
+ const stsPatchErr = "StsPatchErr"
718
+
719
+ c .removeLastAppliedReplicasTotal .WithLabelValues (sts .GetName ()).Inc ()
720
+ anns := sts .GetAnnotations ()
721
+ if anns == nil {
722
+ c .removeLastAppliedReplicasErrorTotal .WithLabelValues (sts .GetName (), noAnnotationErr ).Inc ()
723
+ return fmt .Errorf ("no annotation found on statefulset %s" , sts .GetName ())
724
+ }
725
+ raw , ok := anns [lastAppConfAnnKey ]
726
+ if ! ok || raw == "" {
727
+ c .removeLastAppliedReplicasErrorTotal .WithLabelValues (sts .GetName (), lastAppliedNotFoundErr ).Inc ()
728
+ return fmt .Errorf ("last applied annotation not found in statefulset %s annotations" , sts .GetName ())
729
+ }
730
+
731
+ // Decode annotation JSON.
732
+ var obj map [string ]any
733
+ if err := json .Unmarshal ([]byte (raw ), & obj ); err != nil {
734
+ c .removeLastAppliedReplicasErrorTotal .WithLabelValues (sts .GetName (), jsonDecodeErr ).Inc ()
735
+ return fmt .Errorf ("unmarshal %s: %w" , lastAppConfAnnKey , err )
736
+ }
737
+
738
+ // Remove spec.replicas.
739
+ if spec , ok := obj ["spec" ].(map [string ]any ); ok {
740
+ if _ , ok := spec ["replicas" ]; ! ok {
741
+ c .removeLastAppliedReplicasEmptyTotal .WithLabelValues (sts .GetName ()).Inc ()
742
+ return nil
743
+ }
744
+ delete (spec , "replicas" )
745
+ if len (spec ) == 0 {
746
+ delete (obj , "spec" )
747
+ }
748
+ } else {
749
+ c .removeLastAppliedReplicasErrorTotal .WithLabelValues (sts .GetName (), specNotFoundErr ).Inc ()
750
+ return fmt .Errorf ("no spec found on statefulset %s last applied annotation" , sts .GetName ())
751
+ }
752
+
753
+ // Encode updated annotation.
754
+ newRaw , err := json .Marshal (obj )
755
+ if err != nil {
756
+ c .removeLastAppliedReplicasErrorTotal .WithLabelValues (sts .GetName (), jsonEncodeErr ).Inc ()
757
+ return fmt .Errorf ("marshal %s: %w" , lastAppConfAnnKey , err )
758
+ }
759
+
760
+ // Patch StatefulSet with the new annotation.
761
+ patch := fmt .Sprintf (
762
+ `{"metadata":{"annotations":{"%s":%q}}}` ,
763
+ lastAppConfAnnKey ,
764
+ newRaw ,
765
+ )
766
+ _ , err = c .kubeClient .AppsV1 ().
767
+ StatefulSets (c .namespace ).
768
+ Patch (ctx , sts .GetName (), types .StrategicMergePatchType , []byte (patch ), metav1.PatchOptions {})
769
+ if err != nil {
770
+ c .removeLastAppliedReplicasErrorTotal .WithLabelValues (sts .GetName (), stsPatchErr ).Inc ()
771
+ return err
772
+ }
773
+ c .lastAppliedReplicasRemovedTotal .WithLabelValues (sts .GetName ()).Inc ()
774
+ return nil
775
+ }
0 commit comments