Skip to content

Commit abfacd6

Browse files
authored
fix: Keep health checker running when health check failed. Make healthcheck function pure (#1779)
1 parent a414df7 commit abfacd6

File tree

2 files changed

+23
-26
lines changed

2 files changed

+23
-26
lines changed

pkg/apisix/cluster.go

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -506,33 +506,22 @@ func (c *cluster) UpstreamServiceRelation() UpstreamServiceRelation {
506506

507507
// HealthCheck implements Cluster.HealthCheck method.
508508
func (c *cluster) HealthCheck(ctx context.Context) (err error) {
509-
if c.cacheSyncErr != nil {
510-
err = c.cacheSyncErr
511-
return
512-
}
513-
if atomic.LoadInt32(&c.cacheState) == _cacheSyncing {
514-
return
515-
}
516-
517509
// Retry three times in a row, and exit if all of them fail.
518510
backoff := wait.Backoff{
519511
Duration: 5 * time.Second,
520512
Factor: 1,
521513
Steps: 3,
522514
}
523-
var lastCheckErr error
515+
524516
err = wait.ExponentialBackoffWithContext(ctx, backoff, func() (done bool, _ error) {
525-
if lastCheckErr = c.healthCheck(ctx); lastCheckErr != nil {
517+
if lastCheckErr := c.healthCheck(ctx); lastCheckErr != nil {
526518
log.Warnf("failed to check health for cluster %s: %s, will retry", c.name, lastCheckErr)
527519
return
528520
}
529521
done = true
530522
return
531523
})
532-
if err != nil {
533-
// if ErrWaitTimeout then set lastSyncErr
534-
c.cacheSyncErr = lastCheckErr
535-
}
524+
536525
return err
537526
}
538527

@@ -543,12 +532,16 @@ func (c *cluster) healthCheck(ctx context.Context) (err error) {
543532
if err != nil {
544533
return err
545534
}
546-
if er := conn.Close(); er != nil {
547-
log.Warnw("failed to close tcp probe connection",
548-
zap.Error(err),
549-
zap.String("cluster", c.name),
550-
)
551-
}
535+
defer func(conn net.Conn) {
536+
err := conn.Close()
537+
if err != nil {
538+
log.Warnw("failed to close tcp probe connection",
539+
zap.Error(err),
540+
zap.String("cluster", c.name),
541+
)
542+
}
543+
}(conn)
544+
552545
return
553546
}
554547

pkg/providers/controller.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -570,16 +570,20 @@ func (c *Controller) checkClusterHealth(ctx context.Context, cancelFunc context.
570570

571571
err := c.apisix.Cluster(c.cfg.APISIX.DefaultClusterName).HealthCheck(ctx)
572572
if err != nil {
573-
// Finally failed health check, then give up leader.
574-
log.Warnf("failed to check health for default cluster: %s, give up leader", err)
575573
c.apiServer.HealthState.Lock()
576574
c.apiServer.HealthState.Err = err
577575
c.apiServer.HealthState.Unlock()
578-
579-
return
576+
// Finally failed health check, then give up leader.
577+
log.Warnf("failed to check health for default cluster: %s, give up leader", err)
578+
} else {
579+
if c.apiServer.HealthState.Err != nil {
580+
c.apiServer.HealthState.Lock()
581+
c.apiServer.HealthState.Err = err
582+
c.apiServer.HealthState.Unlock()
583+
}
584+
log.Debugf("success check health for default cluster")
585+
c.MetricsCollector.IncrCheckClusterHealth(c.name)
580586
}
581-
log.Debugf("success check health for default cluster")
582-
c.MetricsCollector.IncrCheckClusterHealth(c.name)
583587
}
584588
}
585589

0 commit comments

Comments
 (0)