Skip to content

Commit c3d6cc7

Browse files
authored
fix: attempt to shut down when provider init fails (#2263)
1 parent 2458b78 commit c3d6cc7

File tree

1 file changed

+29
-31
lines changed

1 file changed

+29
-31
lines changed

pkg/providers/controller.go

Lines changed: 29 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package providers
1616

1717
import (
1818
"context"
19+
"errors"
1920
"fmt"
2021
"os"
2122
"sync"
@@ -183,7 +184,18 @@ func (c *Controller) Run(ctx context.Context) error {
183184
return err
184185
}
185186

186-
c.run(rootCtx)
187+
err := c.run(rootCtx)
188+
if err != nil {
189+
log.Errorf("provider run returned error, exiting process: %s", err)
190+
191+
// attempt to give up leader status, should also release the waitgroup and exit the process
192+
rootCancel()
193+
go func() {
194+
time.Sleep(time.Second * 5)
195+
log.Errorf("process has not quit 5s after provider failure, forcing exit: %s", err)
196+
os.Exit(1)
197+
}()
198+
}
187199

188200
wg.Wait()
189201
return nil
@@ -382,7 +394,7 @@ func (c *Controller) initSharedInformers() *providertypes.ListerInformer {
382394
return listerInformer
383395
}
384396

385-
func (c *Controller) run(ctx context.Context) {
397+
func (c *Controller) run(ctx context.Context) error {
386398
log.Infow("controller tries to leading ...",
387399
zap.String("namespace", c.namespace),
388400
zap.String("pod", c.name),
@@ -406,29 +418,22 @@ func (c *Controller) run(ctx context.Context) {
406418
CacheSynced: !c.cfg.EtcdServer.Enabled,
407419
SSLKeyEncryptSalt: c.cfg.EtcdServer.SSLKeyEncryptSalt,
408420
}
421+
422+
// TODO: needs retry logic
409423
err := c.apisix.AddCluster(ctx, clusterOpts)
410424
if err != nil && err != apisix.ErrDuplicatedCluster {
411-
// TODO give up the leader role
412425
log.Errorf("failed to add default cluster: %s", err)
413-
return
426+
return err
414427
}
415428

416429
if err := c.apisix.Cluster(c.cfg.APISIX.DefaultClusterName).HasSynced(ctx); err != nil {
417-
// TODO give up the leader role
418430
log.Errorf("failed to wait the default cluster to be ready: %s", err)
419-
420-
// re-create apisix cluster, used in next c.run
421-
if err = c.apisix.UpdateCluster(ctx, clusterOpts); err != nil {
422-
log.Errorf("failed to update default cluster: %s", err)
423-
return
424-
}
425-
return
431+
return err
426432
}
427433

428434
// Creation Phase
429435

430436
log.Info("creating controller")
431-
432437
c.informers = c.initSharedInformers()
433438
common := &providertypes.Common{
434439
ControllerNamespace: c.namespace,
@@ -443,14 +448,12 @@ func (c *Controller) run(ctx context.Context) {
443448

444449
c.namespaceProvider, err = namespace.NewWatchingNamespaceProvider(ctx, c.kubeClient, c.cfg, c.resourceSyncCh)
445450
if err != nil {
446-
ctx.Done()
447-
return
451+
return err
448452
}
449453

450454
c.podProvider, err = pod.NewProvider(common, c.namespaceProvider)
451455
if err != nil {
452-
ctx.Done()
453-
return
456+
return err
454457
}
455458

456459
c.translator = translation.NewTranslator(&translation.TranslatorOptions{
@@ -466,20 +469,17 @@ func (c *Controller) run(ctx context.Context) {
466469

467470
c.apisixProvider, c.apisixTranslator, err = apisixprovider.NewProvider(common, c.namespaceProvider, c.translator)
468471
if err != nil {
469-
ctx.Done()
470-
return
472+
return err
471473
}
472474

473475
c.ingressProvider, err = ingressprovider.NewProvider(common, c.namespaceProvider, c.translator, c.apisixTranslator)
474476
if err != nil {
475-
ctx.Done()
476-
return
477+
return err
477478
}
478479

479480
c.kubeProvider, err = k8s.NewProvider(common, c.translator, c.namespaceProvider, c.apisixProvider, c.ingressProvider)
480481
if err != nil {
481-
ctx.Done()
482-
return
482+
return err
483483
}
484484

485485
if c.cfg.Kubernetes.EnableGatewayAPI {
@@ -495,8 +495,7 @@ func (c *Controller) run(ctx context.Context) {
495495
ListerInformer: common.ListerInformer,
496496
})
497497
if err != nil {
498-
ctx.Done()
499-
return
498+
return err
500499
}
501500
}
502501

@@ -505,25 +504,22 @@ func (c *Controller) run(ctx context.Context) {
505504
log.Info("init namespaces")
506505

507506
if err = c.namespaceProvider.Init(ctx); err != nil {
508-
ctx.Done()
509-
return
507+
return err
510508
}
511509

512510
log.Info("wait for resource sync")
513511

514512
// Wait for resource sync
515513
if ok := c.informers.StartAndWaitForCacheSync(ctx); !ok {
516-
ctx.Done()
517-
return
514+
return errors.New("StartAndWaitForCacheSync failed")
518515
}
519516

520517
log.Info("init providers")
521518

522519
// Compare resource
523520
if !c.cfg.EtcdServer.Enabled {
524521
if err = c.apisixProvider.Init(ctx); err != nil {
525-
ctx.Done()
526-
return
522+
return err
527523
}
528524
}
529525

@@ -573,6 +569,8 @@ func (c *Controller) run(ctx context.Context) {
573569
log.Error("Start failed, abort...")
574570
cancelFunc()
575571
}
572+
573+
return nil
576574
}
577575

578576
func (c *Controller) checkClusterHealth(ctx context.Context, cancelFunc context.CancelFunc) {

0 commit comments

Comments
 (0)