Skip to content

Commit 4043bab

Browse files
perdasilvaPer Goncalves da Silva
and
Per Goncalves da Silva
authored
Add more logging around catalog source sync (#3414)
Signed-off-by: Per Goncalves da Silva <[email protected]> Co-authored-by: Per Goncalves da Silva <[email protected]>
1 parent 1274d54 commit 4043bab

File tree

2 files changed

+42
-32
lines changed

2 files changed

+42
-32
lines changed

pkg/controller/operators/catalog/operator.go

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,7 @@ func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *
918918
err = fmt.Errorf("unknown sourcetype: %s", sourceType)
919919
}
920920
if err != nil {
921+
logger.WithError(err).Error("error validating catalog source type")
921922
out.SetError(v1alpha1.CatalogSourceSpecInvalidError, err)
922923
return
923924
}
@@ -929,7 +930,6 @@ func validateSourceType(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *
929930
}
930931
}
931932
continueSync = true
932-
933933
return
934934
}
935935

@@ -942,27 +942,22 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
942942

943943
out = in.DeepCopy()
944944

945-
logger = logger.WithFields(logrus.Fields{
946-
"configmap.namespace": in.Namespace,
947-
"configmap.name": in.Spec.ConfigMap,
948-
})
949-
logger.Info("checking catsrc configmap state")
950-
951945
var updateLabel bool
952946
// Get the catalog source's config map
953947
configMap, err := o.lister.CoreV1().ConfigMapLister().ConfigMaps(in.GetNamespace()).Get(in.Spec.ConfigMap)
954948
// Attempt to look up the CM via api call if there is a cache miss
955949
if apierrors.IsNotFound(err) {
950+
// TODO: Don't reach out via live client if its not found in the cache (https://github.com/operator-framework/operator-lifecycle-manager/issues/3415)
956951
configMap, err = o.opClient.KubernetesInterface().CoreV1().ConfigMaps(in.GetNamespace()).Get(context.TODO(), in.Spec.ConfigMap, metav1.GetOptions{})
957952
// Found cm in the cluster, add managed label to configmap
958953
if err == nil {
959-
labels := configMap.GetLabels()
960-
if labels == nil {
961-
labels = make(map[string]string)
954+
cmLabels := configMap.GetLabels()
955+
if cmLabels == nil {
956+
cmLabels = make(map[string]string)
962957
}
963958

964-
labels[install.OLMManagedLabelKey] = "false"
965-
configMap.SetLabels(labels)
959+
cmLabels[install.OLMManagedLabelKey] = "false"
960+
configMap.SetLabels(cmLabels)
966961
updateLabel = true
967962
}
968963
}
@@ -979,12 +974,9 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
979974
out.SetError(v1alpha1.CatalogSourceConfigMapError, syncError)
980975
return
981976
}
982-
983-
logger.Info("adopted configmap")
984977
}
985978

986979
if in.Status.ConfigMapResource == nil || !in.Status.ConfigMapResource.IsAMatch(&configMap.ObjectMeta) {
987-
logger.Info("updating catsrc configmap state")
988980
// configmap ref nonexistent or updated, write out the new configmap ref to status and exit
989981
out.Status.ConfigMapResource = &v1alpha1.ConfigMapResourceReference{
990982
Name: configMap.GetName(),
@@ -1004,7 +996,6 @@ func (o *Operator) syncConfigMap(logger *logrus.Entry, in *v1alpha1.CatalogSourc
1004996
func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.CatalogSource) (out *v1alpha1.CatalogSource, continueSync bool, syncError error) {
1005997
out = in.DeepCopy()
1006998

1007-
logger.Info("synchronizing registry server")
1008999
sourceKey := registry.CatalogKey{Name: in.GetName(), Namespace: in.GetNamespace()}
10091000
srcReconciler := o.reconciler.ReconcilerForSource(in)
10101001
if srcReconciler == nil {
@@ -1021,21 +1012,15 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10211012
return
10221013
}
10231014

1024-
logger.WithField("health", healthy).Infof("checked registry server health")
1025-
10261015
if healthy && in.Status.RegistryServiceStatus != nil {
1027-
logger.Info("registry state good")
10281016
continueSync = true
10291017
// return here if catalog does not have polling enabled
10301018
if !out.Poll() {
1031-
logger.Info("polling not enabled, nothing more to do")
10321019
return
10331020
}
10341021
}
10351022

10361023
// Registry pod hasn't been created or hasn't been updated since the last configmap update, recreate it
1037-
logger.Info("ensuring registry server")
1038-
10391024
err = srcReconciler.EnsureRegistryServer(logger, out)
10401025
if err != nil {
10411026
if _, ok := err.(reconciler.UpdateNotReadyErr); ok {
@@ -1048,8 +1033,6 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10481033
return
10491034
}
10501035

1051-
logger.Info("ensured registry server")
1052-
10531036
// requeue the catalog sync based on the polling interval, for accurate syncs of catalogs with polling enabled
10541037
if out.Spec.UpdateStrategy != nil && out.Spec.UpdateStrategy.RegistryPoll != nil {
10551038
if out.Spec.UpdateStrategy.Interval == nil {
@@ -1058,16 +1041,17 @@ func (o *Operator) syncRegistryServer(logger *logrus.Entry, in *v1alpha1.Catalog
10581041
return
10591042
}
10601043
if out.Spec.UpdateStrategy.RegistryPoll.ParsingError != "" && out.Status.Reason != v1alpha1.CatalogSourceIntervalInvalidError {
1061-
out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError))
1044+
err := errors.New(out.Spec.UpdateStrategy.RegistryPoll.ParsingError)
1045+
logger.WithError(err).Error("registry server sync error: failed to parse registry poll interval")
1046+
out.SetError(v1alpha1.CatalogSourceIntervalInvalidError, err)
10621047
}
1063-
logger.Infof("requeuing registry server sync based on polling interval %s", out.Spec.UpdateStrategy.Interval.Duration.String())
10641048
resyncPeriod := reconciler.SyncRegistryUpdateInterval(out, time.Now())
10651049
o.catsrcQueueSet.RequeueAfter(out.GetNamespace(), out.GetName(), queueinformer.ResyncWithJitter(resyncPeriod, 0.1)())
10661050
return
10671051
}
10681052

10691053
if err := o.sources.Remove(sourceKey); err != nil {
1070-
o.logger.WithError(err).Debug("error closing client connection")
1054+
o.logger.WithError(err).Error("registry server sync error: error closing client connection")
10711055
}
10721056

10731057
return
@@ -1158,7 +1142,6 @@ func (o *Operator) syncCatalogSources(obj interface{}) (syncError error) {
11581142
"catalogsource.name": catsrc.Name,
11591143
"id": queueinformer.NewLoopID(),
11601144
})
1161-
logger.Info("syncing catalog source")
11621145

11631146
syncFunc := func(in *v1alpha1.CatalogSource, chain []CatalogSourceSyncFunc) (out *v1alpha1.CatalogSource, syncErr error) {
11641147
out = in

pkg/controller/registry/reconciler/grpc.go

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"time"
88

99
"github.com/google/go-cmp/cmp"
10+
1011
"github.com/operator-framework/api/pkg/operators/v1alpha1"
1112

1213
"github.com/operator-framework/operator-lifecycle-manager/pkg/controller/install"
@@ -201,10 +202,9 @@ func (c *GrpcRegistryReconciler) currentUpdatePods(logger *logrus.Entry, source
201202
}
202203

203204
func (c *GrpcRegistryReconciler) currentPodsWithCorrectImageAndSpec(logger *logrus.Entry, source grpcCatalogSourceDecorator, serviceAccount *corev1.ServiceAccount, defaultPodSecurityConfig v1alpha1.SecurityConfig) ([]*corev1.Pod, error) {
204-
logger.Info("searching for current pods")
205205
pods, err := c.Lister.CoreV1().PodLister().Pods(source.GetNamespace()).List(labels.SelectorFromValidatedSet(source.Labels()))
206206
if err != nil {
207-
logger.WithError(err).Warn("couldn't find pod in cache")
207+
logger.WithError(err).Warn("error searching for catalog source pods: couldn't find pod in cache")
208208
return nil, nil
209209
}
210210
found := []*corev1.Pod{}
@@ -222,7 +222,7 @@ func (c *GrpcRegistryReconciler) currentPodsWithCorrectImageAndSpec(logger *logr
222222
if !hash {
223223
logger.Infof("pod spec diff: %s", cmp.Diff(p.Spec, newPod.Spec))
224224
}
225-
if correctImages(source, p) && podHashMatch(p, newPod) {
225+
if images && hash {
226226
found = append(found, p)
227227
}
228228
}
@@ -252,6 +252,7 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
252252
// if service status is nil, we force create every object to ensure they're created the first time
253253
valid, err := isRegistryServiceStatusValid(&source)
254254
if err != nil {
255+
logger.WithError(err).Error("error ensuring registry server: could not validate registry service status")
255256
return err
256257
}
257258
overwrite := !valid
@@ -262,22 +263,26 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
262263
//TODO: if any of these error out, we should write a status back (possibly set RegistryServiceStatus to nil so they get recreated)
263264
sa, err := c.ensureSA(source)
264265
if err != nil && !apierrors.IsAlreadyExists(err) {
266+
logger.WithError(err).Error("error ensuring registry server: could not ensure registry service account")
265267
return pkgerrors.Wrapf(err, "error ensuring service account: %s", source.GetName())
266268
}
267269

268270
sa, err = c.OpClient.GetServiceAccount(sa.GetNamespace(), sa.GetName())
269271
if err != nil {
272+
logger.WithError(err).Error("error ensuring registry server: could not get registry service account")
270273
return err
271274
}
272275

273276
defaultPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace())
274277
if err != nil {
278+
logger.WithError(err).Error("error ensuring registry server: could not get default pod security config")
275279
return err
276280
}
277281

278282
// recreate the pod if no existing pod is serving the latest image or correct spec
279283
current, err := c.currentPodsWithCorrectImageAndSpec(logger, source, sa, defaultPodSecurityConfig)
280284
if err != nil {
285+
logger.WithError(err).Error("error ensuring registry server: could not get current pods with correct image and spec")
281286
return err
282287
}
283288
overwritePod := overwrite || len(current) == 0
@@ -287,29 +292,37 @@ func (c *GrpcRegistryReconciler) EnsureRegistryServer(logger *logrus.Entry, cata
287292

288293
pod, err := source.Pod(sa, defaultPodSecurityConfig)
289294
if err != nil {
295+
logger.WithError(err).Error("error ensuring registry server: could not create registry pod")
290296
return err
291297
}
292298
if err := c.ensurePod(logger, source, sa, defaultPodSecurityConfig, overwritePod); err != nil {
299+
logger.WithError(err).Error("error ensuring registry server: could not ensure registry pod")
293300
return pkgerrors.Wrapf(err, "error ensuring pod: %s", pod.GetName())
294301
}
295302
if err := c.ensureUpdatePod(logger, sa, defaultPodSecurityConfig, source); err != nil {
303+
logger.WithError(err).Error("error ensuring registry server: could not ensure update pod")
296304
if _, ok := err.(UpdateNotReadyErr); ok {
305+
logger.WithError(err).Error("error ensuring registry server: ensure update pod error is not of type UpdateNotReadyErr")
297306
return err
298307
}
299308
return pkgerrors.Wrapf(err, "error ensuring updated catalog source pod: %s", pod.GetName())
300309
}
310+
301311
service, err := source.Service()
302312
if err != nil {
313+
logger.WithError(err).Error("couldn't get service")
303314
return err
304315
}
305316
if err := c.ensureService(source, overwrite); err != nil {
317+
logger.WithError(err).Error("error ensuring registry server: could not ensure service")
306318
return pkgerrors.Wrapf(err, "error ensuring service: %s", service.GetName())
307319
}
308320

309321
if overwritePod {
310322
now := c.now()
311323
service, err := source.Service()
312324
if err != nil {
325+
logger.WithError(err).Error("error ensuring registry server: could not get service")
313326
return err
314327
}
315328
catalogSource.Status.RegistryServiceStatus = &v1alpha1.RegistryServiceStatus{
@@ -603,6 +616,7 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal
603616
serviceAccount := source.ServiceAccount()
604617
serviceAccount, err := c.OpClient.GetServiceAccount(serviceAccount.GetNamespace(), serviceAccount.GetName())
605618
if err != nil {
619+
logger.WithError(err).Error("registry service not healthy: could not get service account")
606620
if !apierrors.IsNotFound(err) {
607621
return false, err
608622
}
@@ -611,25 +625,38 @@ func (c *GrpcRegistryReconciler) CheckRegistryServer(logger *logrus.Entry, catal
611625

612626
registryPodSecurityConfig, err := getDefaultPodContextConfig(c.OpClient, catalogSource.GetNamespace())
613627
if err != nil {
628+
logger.WithError(err).Error("registry service not healthy: could not get registry pod security config")
614629
return false, err
615630
}
616631

617632
// Check on registry resources
618633
// TODO: add gRPC health check
619634
service, err := c.currentService(source)
620635
if err != nil {
636+
logger.WithError(err).Error("registry service not healthy: could not get current service")
621637
return false, err
622638
}
639+
623640
currentPods, err := c.currentPodsWithCorrectImageAndSpec(logger, source, serviceAccount, registryPodSecurityConfig)
624641
if err != nil {
642+
logger.WithError(err).Error("registry service not healthy: could not get current pods")
625643
return false, err
626644
}
645+
646+
currentServiceAccount := c.currentServiceAccount(source)
627647
if len(currentPods) < 1 ||
628-
service == nil || c.currentServiceAccount(source) == nil {
648+
service == nil || currentServiceAccount == nil {
649+
logger.WithFields(logrus.Fields{
650+
"numCurrentPods": len(currentPods),
651+
"isServiceNil": service == nil,
652+
"isCurrentServiceAccountNil": currentServiceAccount == nil,
653+
}).Error("registry service not healthy: one or more required resources are missing")
629654
return false, nil
630655
}
656+
631657
podsAreLive, e := detectAndDeleteDeadPods(logger, c.OpClient, currentPods, source.GetNamespace())
632658
if e != nil {
659+
logger.WithError(e).Error("registry service not healthy: could not detect and delete dead pods")
633660
return false, fmt.Errorf("error deleting dead pods: %v", e)
634661
}
635662
return podsAreLive, nil

0 commit comments

Comments
 (0)