From 951a631b1904feb83731f5ccd8f8a8a9b1aaf09f Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 19 Jan 2024 11:23:50 -0800 Subject: [PATCH 01/43] WIP --- internal/mode/static/telemetry/collector.go | 38 ++++++++++++ internal/mode/static/telemetry/exporter.go | 4 +- internal/mode/static/telemetry/job.go | 69 +++++++++++++++++++++ 3 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 internal/mode/static/telemetry/collector.go create mode 100644 internal/mode/static/telemetry/job.go diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go new file mode 100644 index 0000000000..0f12d08465 --- /dev/null +++ b/internal/mode/static/telemetry/collector.go @@ -0,0 +1,38 @@ +package telemetry + +import ( + "context" + + v1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type Collector interface { + CollectData() Data +} + +type DataCollector struct { + ctx context.Context + k8sClient client.Client +} + +func NewDataCollector(ctx context.Context, k8sClient client.Client) *DataCollector { + return &DataCollector{ + ctx: ctx, + k8sClient: k8sClient, + } +} + +func (c DataCollector) CollectData() Data { + nodeCount := collectNodeCount(c.ctx, c.k8sClient) + + data := Data{NodeCount: nodeCount} + + return data +} + +func collectNodeCount(ctx context.Context, k8sClient client.Client) int { + nodes := &v1.NodeList{} + _ = k8sClient.List(ctx, nodes) + return len(nodes.Items) +} diff --git a/internal/mode/static/telemetry/exporter.go b/internal/mode/static/telemetry/exporter.go index c010510926..737de399a2 100644 --- a/internal/mode/static/telemetry/exporter.go +++ b/internal/mode/static/telemetry/exporter.go @@ -8,7 +8,9 @@ import ( // Data is telemetry data. // Note: this type might change once https://github.com/nginxinc/nginx-gateway-fabric/issues/1318 is implemented. -type Data struct{} +type Data struct { + NodeCount int +} //go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . Exporter diff --git a/internal/mode/static/telemetry/job.go b/internal/mode/static/telemetry/job.go new file mode 100644 index 0000000000..2139b1c623 --- /dev/null +++ b/internal/mode/static/telemetry/job.go @@ -0,0 +1,69 @@ +package telemetry + +import ( + "context" + "time" + + "github.com/go-logr/logr" + "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +// JobConfig is the configuration for the telemetry job. +type JobConfig struct { + // Exporter is the exporter to use for exporting telemetry data. + Exporter Exporter + // Collector is the collector to use for collecting telemetry data. + Collector Collector + // Logger is the logger. + Logger logr.Logger + // Period defines the period of the telemetry job. The job will run every Period. + Period time.Duration +} + +// Job periodically exports telemetry data using the provided exporter. +type Job struct { + cfg JobConfig +} + +// NewJob creates a new telemetry job. +func NewJob(cfg JobConfig) *Job { + return &Job{ + cfg: cfg, + } +} + +// Start starts the telemetry job. +// Implements controller-runtime manager.Runnable +func (j *Job) Start(ctx context.Context) error { + j.cfg.Logger.Info("Starting telemetry job") + + report := func(ctx context.Context) { + // Gather telemetry + j.cfg.Logger.V(1).Info("Gathering telemetry") + + // We will need to gather data as defined in https://github.com/nginxinc/nginx-gateway-fabric/issues/793 + data := j.cfg.Collector.CollectData() + + // Export telemetry + j.cfg.Logger.V(1).Info("Exporting telemetry") + + if err := j.cfg.Exporter.Export(ctx, data); err != nil { + j.cfg.Logger.Error(err, "Failed to export telemetry") + } + } + + const ( + // 10 min jitter is enough per telemetry destination recommendation + // For the default period of 24 hours, jitter will be 10min /(24*60)min = 0.0069 + jitterFactor = 10.0 / (24 * 60) // added jitter is bound by jitterFactor * period + sliding = true // This means the period with jitter will be calculated after each report() call. + ) + + wait.JitterUntilWithContext(ctx, report, j.cfg.Period, jitterFactor, sliding) + + j.cfg.Logger.Info("Stopping telemetry job") + return nil +} + +var _ manager.Runnable = &Job{} From 55320c6ff56b866ee9aba2edff29329570c5f19a Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 22 Jan 2024 10:18:01 -0800 Subject: [PATCH 02/43] Add graph resource count but still WIP --- .../mode/static/state/change_processor.go | 7 ++ .../state/statefakes/fake_change_processor.go | 65 +++++++++++++++++++ internal/mode/static/telemetry/collector.go | 41 +++++++++++- internal/mode/static/telemetry/exporter.go | 3 +- 4 files changed, 113 insertions(+), 3 deletions(-) diff --git a/internal/mode/static/state/change_processor.go b/internal/mode/static/state/change_processor.go index 93229e0d45..4aa37ef41b 100644 --- a/internal/mode/static/state/change_processor.go +++ b/internal/mode/static/state/change_processor.go @@ -62,6 +62,7 @@ type ChangeProcessor interface { // Process produces a graph-like representation of GatewayAPI resources. // If no changes were captured, the changed return argument will be NoChange and graph will be empty. Process() (changeType ChangeType, graphCfg *graph.Graph) + GetLatestGraph() *graph.Graph } // ChangeProcessorConfig holds configuration parameters for ChangeProcessorImpl. @@ -256,3 +257,9 @@ func (c *ChangeProcessorImpl) Process() (ChangeType, *graph.Graph) { return changeType, c.latestGraph } + +func (c *ChangeProcessorImpl) GetLatestGraph() *graph.Graph { + c.lock.Lock() + defer c.lock.Unlock() + return c.latestGraph +} diff --git a/internal/mode/static/state/statefakes/fake_change_processor.go b/internal/mode/static/state/statefakes/fake_change_processor.go index e9a37bbd68..7bd887ed0e 100644 --- a/internal/mode/static/state/statefakes/fake_change_processor.go +++ b/internal/mode/static/state/statefakes/fake_change_processor.go @@ -23,6 +23,16 @@ type FakeChangeProcessor struct { arg1 client.Object } ProcessStub func() (state.ChangeType, *graph.Graph) + GetLatestGraphStub func() *graph.Graph + getLatestGraphMutex sync.RWMutex + getLatestGraphArgsForCall []struct { + } + getLatestGraphReturns struct { + result1 *graph.Graph + } + getLatestGraphReturnsOnCall map[int]struct { + result1 *graph.Graph + } processMutex sync.RWMutex processArgsForCall []struct { } @@ -103,6 +113,59 @@ func (fake *FakeChangeProcessor) CaptureUpsertChangeArgsForCall(i int) client.Ob return argsForCall.arg1 } +func (fake *FakeChangeProcessor) GetLatestGraph() *graph.Graph { + fake.getLatestGraphMutex.Lock() + ret, specificReturn := fake.getLatestGraphReturnsOnCall[len(fake.getLatestGraphArgsForCall)] + fake.getLatestGraphArgsForCall = append(fake.getLatestGraphArgsForCall, struct { + }{}) + stub := fake.GetLatestGraphStub + fakeReturns := fake.getLatestGraphReturns + fake.recordInvocation("GetLatestGraph", []interface{}{}) + fake.getLatestGraphMutex.Unlock() + if stub != nil { + return stub() + } + if specificReturn { + return ret.result1 + } + return fakeReturns.result1 +} + +func (fake *FakeChangeProcessor) GetLatestGraphCallCount() int { + fake.getLatestGraphMutex.RLock() + defer fake.getLatestGraphMutex.RUnlock() + return len(fake.getLatestGraphArgsForCall) +} + +func (fake *FakeChangeProcessor) GetLatestGraphCalls(stub func() *graph.Graph) { + fake.getLatestGraphMutex.Lock() + defer fake.getLatestGraphMutex.Unlock() + fake.GetLatestGraphStub = stub +} + +func (fake *FakeChangeProcessor) GetLatestGraphReturns(result1 *graph.Graph) { + fake.getLatestGraphMutex.Lock() + defer fake.getLatestGraphMutex.Unlock() + fake.GetLatestGraphStub = nil + fake.getLatestGraphReturns = struct { + result1 *graph.Graph + }{result1} +} + +func (fake *FakeChangeProcessor) GetLatestGraphReturnsOnCall(i int, result1 *graph.Graph) { + fake.getLatestGraphMutex.Lock() + defer fake.getLatestGraphMutex.Unlock() + fake.GetLatestGraphStub = nil + if fake.getLatestGraphReturnsOnCall == nil { + fake.getLatestGraphReturnsOnCall = make(map[int]struct { + result1 *graph.Graph + }) + } + fake.getLatestGraphReturnsOnCall[i] = struct { + result1 *graph.Graph + }{result1} +} + func (fake *FakeChangeProcessor) Process() (state.ChangeType, *graph.Graph) { fake.processMutex.Lock() ret, specificReturn := fake.processReturnsOnCall[len(fake.processArgsForCall)] @@ -166,6 +229,8 @@ func (fake *FakeChangeProcessor) Invocations() map[string][][]interface{} { defer fake.captureDeleteChangeMutex.RUnlock() fake.captureUpsertChangeMutex.RLock() defer fake.captureUpsertChangeMutex.RUnlock() + fake.getLatestGraphMutex.RLock() + defer fake.getLatestGraphMutex.RUnlock() fake.processMutex.RLock() defer fake.processMutex.RUnlock() copiedInvocations := map[string][][]interface{}{} diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index 0f12d08465..a7d876ffbd 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -5,6 +5,8 @@ import ( v1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state" ) type Collector interface { @@ -14,19 +16,34 @@ type Collector interface { type DataCollector struct { ctx context.Context k8sClient client.Client + processor state.ChangeProcessor +} + +type GraphResourceCount struct { + Gateways int + GatewayClasses int + HTTPRoutes int + ReferencedSecrets int + ReferencedServices int } -func NewDataCollector(ctx context.Context, k8sClient client.Client) *DataCollector { +func NewDataCollector( + ctx context.Context, + k8sClient client.Client, + processor state.ChangeProcessor, +) *DataCollector { return &DataCollector{ ctx: ctx, k8sClient: k8sClient, + processor: processor, } } func (c DataCollector) CollectData() Data { nodeCount := collectNodeCount(c.ctx, c.k8sClient) + graphResourceCount := collectGraphResourceCount(c.processor) - data := Data{NodeCount: nodeCount} + data := Data{NodeCount: nodeCount, GraphResourceCount: graphResourceCount} return data } @@ -36,3 +53,23 @@ func collectNodeCount(ctx context.Context, k8sClient client.Client) int { _ = k8sClient.List(ctx, nodes) return len(nodes.Items) } + +func collectGraphResourceCount(processor state.ChangeProcessor) GraphResourceCount { + graphResourceCount := GraphResourceCount{} + graph := processor.GetLatestGraph() + + if graph.GatewayClass != nil { + graphResourceCount.GatewayClasses = 1 + } + if graph.Gateway != nil { + graphResourceCount.GatewayClasses = 1 + } + if graph.Routes != nil { + graphResourceCount.HTTPRoutes = len(graph.Routes) + } + if graph.ReferencedSecrets != nil { + graphResourceCount.ReferencedSecrets = len(graph.ReferencedSecrets) + } + + return graphResourceCount +} diff --git a/internal/mode/static/telemetry/exporter.go b/internal/mode/static/telemetry/exporter.go index 737de399a2..d0f6839252 100644 --- a/internal/mode/static/telemetry/exporter.go +++ b/internal/mode/static/telemetry/exporter.go @@ -9,7 +9,8 @@ import ( // Data is telemetry data. // Note: this type might change once https://github.com/nginxinc/nginx-gateway-fabric/issues/1318 is implemented. type Data struct { - NodeCount int + NodeCount int + GraphResourceCount GraphResourceCount } //go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . Exporter From 3b43d1fcb5efcf5de9ca3cffa2b8318080145d76 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 22 Jan 2024 10:21:26 -0800 Subject: [PATCH 03/43] Add small formatting --- internal/mode/static/telemetry/collector.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index a7d876ffbd..f22a533d48 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -43,7 +43,10 @@ func (c DataCollector) CollectData() Data { nodeCount := collectNodeCount(c.ctx, c.k8sClient) graphResourceCount := collectGraphResourceCount(c.processor) - data := Data{NodeCount: nodeCount, GraphResourceCount: graphResourceCount} + data := Data{ + NodeCount: nodeCount, + GraphResourceCount: graphResourceCount, + } return data } From d6181a6cbee5d7053c3384e0a30c173be1540fe8 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 23 Jan 2024 13:19:32 -0800 Subject: [PATCH 04/43] Add review feedback --- internal/mode/static/telemetry/collector.go | 66 ++++++++++++--------- internal/mode/static/telemetry/exporter.go | 7 --- internal/mode/static/telemetry/job.go | 4 +- 3 files changed, 39 insertions(+), 38 deletions(-) diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index f22a533d48..11a7efdc5d 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -7,41 +7,49 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state" + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/graph" ) -type Collector interface { - CollectData() Data +type DataCollector interface { + Collect(ctx context.Context) Data } - -type DataCollector struct { - ctx context.Context - k8sClient client.Client - processor state.ChangeProcessor +type GraphGetter interface { + GetLatestGraph() *graph.Graph } type GraphResourceCount struct { - Gateways int - GatewayClasses int - HTTPRoutes int - ReferencedSecrets int - ReferencedServices int + Gateways int + GatewayClasses int + HTTPRoutes int + Secrets int + Services int +} + +// Data is telemetry data. +// Note: this type might change once https://github.com/nginxinc/nginx-gateway-fabric/issues/1318 is implemented. +type Data struct { + NodeCount int + GraphResourceCount GraphResourceCount +} + +type DataCollectorImpl struct { + k8sClient client.Client + graphGetter GraphGetter } func NewDataCollector( - ctx context.Context, k8sClient client.Client, processor state.ChangeProcessor, -) *DataCollector { - return &DataCollector{ - ctx: ctx, - k8sClient: k8sClient, - processor: processor, +) *DataCollectorImpl { + return &DataCollectorImpl{ + k8sClient: k8sClient, + graphGetter: processor, } } -func (c DataCollector) CollectData() Data { - nodeCount := collectNodeCount(c.ctx, c.k8sClient) - graphResourceCount := collectGraphResourceCount(c.processor) +func (c DataCollectorImpl) Collect(ctx context.Context) Data { + nodeCount := collectNodeCount(ctx, c.k8sClient) + graphResourceCount := collectGraphResourceCount(c.graphGetter) data := Data{ NodeCount: nodeCount, @@ -57,21 +65,21 @@ func collectNodeCount(ctx context.Context, k8sClient client.Client) int { return len(nodes.Items) } -func collectGraphResourceCount(processor state.ChangeProcessor) GraphResourceCount { +func collectGraphResourceCount(graphGetter GraphGetter) GraphResourceCount { graphResourceCount := GraphResourceCount{} - graph := processor.GetLatestGraph() + g := graphGetter.GetLatestGraph() - if graph.GatewayClass != nil { + if g.GatewayClass != nil { graphResourceCount.GatewayClasses = 1 } - if graph.Gateway != nil { + if g.Gateway != nil { graphResourceCount.GatewayClasses = 1 } - if graph.Routes != nil { - graphResourceCount.HTTPRoutes = len(graph.Routes) + if g.Routes != nil { + graphResourceCount.HTTPRoutes = len(g.Routes) } - if graph.ReferencedSecrets != nil { - graphResourceCount.ReferencedSecrets = len(graph.ReferencedSecrets) + if g.ReferencedSecrets != nil { + graphResourceCount.Secrets = len(g.ReferencedSecrets) } return graphResourceCount diff --git a/internal/mode/static/telemetry/exporter.go b/internal/mode/static/telemetry/exporter.go index d0f6839252..55bee6f2be 100644 --- a/internal/mode/static/telemetry/exporter.go +++ b/internal/mode/static/telemetry/exporter.go @@ -6,13 +6,6 @@ import ( "github.com/go-logr/logr" ) -// Data is telemetry data. -// Note: this type might change once https://github.com/nginxinc/nginx-gateway-fabric/issues/1318 is implemented. -type Data struct { - NodeCount int - GraphResourceCount GraphResourceCount -} - //go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . Exporter // Exporter exports telemetry data to some destination. diff --git a/internal/mode/static/telemetry/job.go b/internal/mode/static/telemetry/job.go index 2139b1c623..402f84567d 100644 --- a/internal/mode/static/telemetry/job.go +++ b/internal/mode/static/telemetry/job.go @@ -14,7 +14,7 @@ type JobConfig struct { // Exporter is the exporter to use for exporting telemetry data. Exporter Exporter // Collector is the collector to use for collecting telemetry data. - Collector Collector + DataCollector DataCollector // Logger is the logger. Logger logr.Logger // Period defines the period of the telemetry job. The job will run every Period. @@ -43,7 +43,7 @@ func (j *Job) Start(ctx context.Context) error { j.cfg.Logger.V(1).Info("Gathering telemetry") // We will need to gather data as defined in https://github.com/nginxinc/nginx-gateway-fabric/issues/793 - data := j.cfg.Collector.CollectData() + data := j.cfg.DataCollector.Collect(ctx) // Export telemetry j.cfg.Logger.V(1).Info("Exporting telemetry") From f51d0ee3e7b5e0eb43c189e5cb90b37d7401c0b5 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 23 Jan 2024 13:37:30 -0800 Subject: [PATCH 05/43] Add version+name and adjust graphGetter --- cmd/gateway/commands.go | 1 + internal/mode/static/config/config.go | 2 ++ internal/mode/static/telemetry/collector.go | 22 ++++++++++++++++----- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go index d4eaa3ccc5..29c729f436 100644 --- a/cmd/gateway/commands.go +++ b/cmd/gateway/commands.go @@ -170,6 +170,7 @@ func createStaticModeCommand() *cobra.Command { }, Plus: plus, TelemetryReportPeriod: period, + Version: version, } if err := static.StartManager(conf); err != nil { diff --git a/internal/mode/static/config/config.go b/internal/mode/static/config/config.go index d434fce894..abbec1faa6 100644 --- a/internal/mode/static/config/config.go +++ b/internal/mode/static/config/config.go @@ -9,6 +9,8 @@ import ( ) type Config struct { + // Version is the running NGF version. + Version string // AtomicLevel is an atomically changeable, dynamic logging level. AtomicLevel zap.AtomicLevel // GatewayNsName is the namespaced name of a Gateway resource that the Gateway will use. diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index 11a7efdc5d..121af8a5a1 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -6,7 +6,6 @@ import ( v1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/graph" ) @@ -25,25 +24,34 @@ type GraphResourceCount struct { Services int } +type ProjectNameAndVersion struct { + Name string + Version string +} + // Data is telemetry data. // Note: this type might change once https://github.com/nginxinc/nginx-gateway-fabric/issues/1318 is implemented. type Data struct { - NodeCount int - GraphResourceCount GraphResourceCount + ProjectNameAndVersion ProjectNameAndVersion + NodeCount int + GraphResourceCount GraphResourceCount } type DataCollectorImpl struct { k8sClient client.Client graphGetter GraphGetter + version string } func NewDataCollector( k8sClient client.Client, - processor state.ChangeProcessor, + graphGetter GraphGetter, + version string, ) *DataCollectorImpl { return &DataCollectorImpl{ k8sClient: k8sClient, - graphGetter: processor, + graphGetter: graphGetter, + version: version, } } @@ -54,6 +62,10 @@ func (c DataCollectorImpl) Collect(ctx context.Context) Data { data := Data{ NodeCount: nodeCount, GraphResourceCount: graphResourceCount, + ProjectNameAndVersion: ProjectNameAndVersion{ + Name: "NGF", + Version: c.version, + }, } return data From 0e467db9500cbdb8b9c72d1b8ab45c8ff6ed2939 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Tue, 23 Jan 2024 17:10:34 -0800 Subject: [PATCH 06/43] Change k8sClient to client Reader --- internal/mode/static/telemetry/collector.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index 121af8a5a1..1c76c61b18 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -38,25 +38,25 @@ type Data struct { } type DataCollectorImpl struct { - k8sClient client.Client - graphGetter GraphGetter - version string + k8sClientReader client.Reader + graphGetter GraphGetter + version string } func NewDataCollector( - k8sClient client.Client, + k8sClientReader client.Reader, graphGetter GraphGetter, version string, ) *DataCollectorImpl { return &DataCollectorImpl{ - k8sClient: k8sClient, - graphGetter: graphGetter, - version: version, + k8sClientReader: k8sClientReader, + graphGetter: graphGetter, + version: version, } } func (c DataCollectorImpl) Collect(ctx context.Context) Data { - nodeCount := collectNodeCount(ctx, c.k8sClient) + nodeCount := collectNodeCount(ctx, c.k8sClientReader) graphResourceCount := collectGraphResourceCount(c.graphGetter) data := Data{ @@ -71,7 +71,7 @@ func (c DataCollectorImpl) Collect(ctx context.Context) Data { return data } -func collectNodeCount(ctx context.Context, k8sClient client.Client) int { +func collectNodeCount(ctx context.Context, k8sClient client.Reader) int { nodes := &v1.NodeList{} _ = k8sClient.List(ctx, nodes) return len(nodes.Items) From f7a2b08e511699f8910b204605fa245f6dc25e02 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 24 Jan 2024 09:43:36 -0800 Subject: [PATCH 07/43] Add small feedback review --- internal/mode/static/state/change_processor.go | 1 + internal/mode/static/telemetry/collector.go | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/internal/mode/static/state/change_processor.go b/internal/mode/static/state/change_processor.go index 4aa37ef41b..9d07be7aa0 100644 --- a/internal/mode/static/state/change_processor.go +++ b/internal/mode/static/state/change_processor.go @@ -261,5 +261,6 @@ func (c *ChangeProcessorImpl) Process() (ChangeType, *graph.Graph) { func (c *ChangeProcessorImpl) GetLatestGraph() *graph.Graph { c.lock.Lock() defer c.lock.Unlock() + return c.latestGraph } diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index 1c76c61b18..a5e928663d 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -12,6 +12,7 @@ import ( type DataCollector interface { Collect(ctx context.Context) Data } + type GraphGetter interface { GetLatestGraph() *graph.Graph } @@ -72,8 +73,8 @@ func (c DataCollectorImpl) Collect(ctx context.Context) Data { } func collectNodeCount(ctx context.Context, k8sClient client.Reader) int { - nodes := &v1.NodeList{} - _ = k8sClient.List(ctx, nodes) + nodes := v1.NodeList{} + _ = k8sClient.List(ctx, &nodes) return len(nodes.Items) } From 266e713afd114558080f7afeaf6c50a46ecc3025 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 24 Jan 2024 11:10:57 -0800 Subject: [PATCH 08/43] Add more feedback from review --- internal/mode/static/telemetry/collector.go | 23 ++++++++++----------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index a5e928663d..7b5d2309ed 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -23,9 +23,11 @@ type GraphResourceCount struct { HTTPRoutes int Secrets int Services int + EndpointSlices int + Endpoints int } -type ProjectNameAndVersion struct { +type ProjectMetadata struct { Name string Version string } @@ -33,9 +35,9 @@ type ProjectNameAndVersion struct { // Data is telemetry data. // Note: this type might change once https://github.com/nginxinc/nginx-gateway-fabric/issues/1318 is implemented. type Data struct { - ProjectNameAndVersion ProjectNameAndVersion - NodeCount int - GraphResourceCount GraphResourceCount + ProjectMetadata ProjectMetadata + NodeCount int + GraphResourceCount GraphResourceCount } type DataCollectorImpl struct { @@ -63,7 +65,7 @@ func (c DataCollectorImpl) Collect(ctx context.Context) Data { data := Data{ NodeCount: nodeCount, GraphResourceCount: graphResourceCount, - ProjectNameAndVersion: ProjectNameAndVersion{ + ProjectMetadata: ProjectMetadata{ Name: "NGF", Version: c.version, }, @@ -86,14 +88,11 @@ func collectGraphResourceCount(graphGetter GraphGetter) GraphResourceCount { graphResourceCount.GatewayClasses = 1 } if g.Gateway != nil { - graphResourceCount.GatewayClasses = 1 - } - if g.Routes != nil { - graphResourceCount.HTTPRoutes = len(g.Routes) - } - if g.ReferencedSecrets != nil { - graphResourceCount.Secrets = len(g.ReferencedSecrets) + graphResourceCount.Gateways = 1 } + graphResourceCount.HTTPRoutes = len(g.Routes) + graphResourceCount.Secrets = len(g.ReferencedSecrets) + graphResourceCount.Services = len(g.ReferencedServices) return graphResourceCount } From 5a4f0c71930a9b785133016c4ec1529dffd61f65 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 25 Jan 2024 08:45:20 -0800 Subject: [PATCH 09/43] Add feedback and cfg --- internal/mode/static/telemetry/collector.go | 77 +++++++++++---------- internal/mode/static/telemetry/job.go | 12 +++- 2 files changed, 52 insertions(+), 37 deletions(-) diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index 7b5d2309ed..7bdfd0e523 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -9,15 +9,11 @@ import ( "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/graph" ) -type DataCollector interface { - Collect(ctx context.Context) Data -} - type GraphGetter interface { GetLatestGraph() *graph.Graph } -type GraphResourceCount struct { +type NGFResourceCounts struct { Gateways int GatewayClasses int HTTPRoutes int @@ -35,64 +31,75 @@ type ProjectMetadata struct { // Data is telemetry data. // Note: this type might change once https://github.com/nginxinc/nginx-gateway-fabric/issues/1318 is implemented. type Data struct { - ProjectMetadata ProjectMetadata - NodeCount int - GraphResourceCount GraphResourceCount + ProjectMetadata ProjectMetadata + NodeCount int + NGFResourceCounts NGFResourceCounts +} + +// DataCollectorConfig holds configuration parameters for DataCollectorImpl. +type DataCollectorConfig struct { + // K8sClientReader is a Kubernetes API client Reader. + K8sClientReader client.Reader + // GraphGetter allows us to get the Graph. + GraphGetter GraphGetter + // Version is the NGF version. + Version string } type DataCollectorImpl struct { - k8sClientReader client.Reader - graphGetter GraphGetter - version string + cfg DataCollectorConfig } func NewDataCollector( - k8sClientReader client.Reader, - graphGetter GraphGetter, - version string, + cfg DataCollectorConfig, ) *DataCollectorImpl { return &DataCollectorImpl{ - k8sClientReader: k8sClientReader, - graphGetter: graphGetter, - version: version, + cfg: cfg, } } -func (c DataCollectorImpl) Collect(ctx context.Context) Data { - nodeCount := collectNodeCount(ctx, c.k8sClientReader) - graphResourceCount := collectGraphResourceCount(c.graphGetter) +func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) { + nodeCount, err := collectNodeCount(ctx, c.cfg.K8sClientReader) + if err != nil { + return Data{}, err + } + graphResourceCount := collectGraphResourceCount(c.cfg.GraphGetter) data := Data{ - NodeCount: nodeCount, - GraphResourceCount: graphResourceCount, + NodeCount: nodeCount, + NGFResourceCounts: graphResourceCount, ProjectMetadata: ProjectMetadata{ Name: "NGF", - Version: c.version, + Version: c.cfg.Version, }, } - return data + return data, nil } -func collectNodeCount(ctx context.Context, k8sClient client.Reader) int { +func collectNodeCount(ctx context.Context, k8sClient client.Reader) (int, error) { nodes := v1.NodeList{} - _ = k8sClient.List(ctx, &nodes) - return len(nodes.Items) + if err := k8sClient.List(ctx, &nodes); err != nil { + return 0, err + } + + return len(nodes.Items), nil } -func collectGraphResourceCount(graphGetter GraphGetter) GraphResourceCount { - graphResourceCount := GraphResourceCount{} +func collectGraphResourceCount(graphGetter GraphGetter) NGFResourceCounts { + ngfResourceCounts := NGFResourceCounts{} g := graphGetter.GetLatestGraph() if g.GatewayClass != nil { - graphResourceCount.GatewayClasses = 1 + ngfResourceCounts.GatewayClasses = 1 } if g.Gateway != nil { - graphResourceCount.Gateways = 1 + ngfResourceCounts.Gateways = 1 } - graphResourceCount.HTTPRoutes = len(g.Routes) - graphResourceCount.Secrets = len(g.ReferencedSecrets) - graphResourceCount.Services = len(g.ReferencedServices) + ngfResourceCounts.HTTPRoutes = len(g.Routes) + ngfResourceCounts.Secrets = len(g.ReferencedSecrets) + // WIP: ReferencedServices may contain non-existing services + ngfResourceCounts.Services = len(g.ReferencedServices) - return graphResourceCount + return ngfResourceCounts } diff --git a/internal/mode/static/telemetry/job.go b/internal/mode/static/telemetry/job.go index 402f84567d..3c079a31fe 100644 --- a/internal/mode/static/telemetry/job.go +++ b/internal/mode/static/telemetry/job.go @@ -9,11 +9,16 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" ) +// DataCollector collects telemetry data. +type DataCollector interface { + Collect(ctx context.Context) (Data, error) +} + // JobConfig is the configuration for the telemetry job. type JobConfig struct { // Exporter is the exporter to use for exporting telemetry data. Exporter Exporter - // Collector is the collector to use for collecting telemetry data. + // DataCollector is the collector to use for collecting telemetry data. DataCollector DataCollector // Logger is the logger. Logger logr.Logger @@ -43,7 +48,10 @@ func (j *Job) Start(ctx context.Context) error { j.cfg.Logger.V(1).Info("Gathering telemetry") // We will need to gather data as defined in https://github.com/nginxinc/nginx-gateway-fabric/issues/793 - data := j.cfg.DataCollector.Collect(ctx) + data, err := j.cfg.DataCollector.Collect(ctx) + if err != nil { + j.cfg.Logger.Error(err, "Failed to collect telemetry") + } // Export telemetry j.cfg.Logger.V(1).Info("Exporting telemetry") From e400f5e6183dab891ce92c95c8c9c46b73485c75 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 25 Jan 2024 11:22:34 -0800 Subject: [PATCH 10/43] Change ReferencedServices and add generic counting function --- .../static/state/change_processor_test.go | 4 +- internal/mode/static/state/graph/graph.go | 6 +- .../mode/static/state/graph/graph_test.go | 8 +- internal/mode/static/state/graph/service.go | 8 +- .../mode/static/state/graph/service_test.go | 50 ++++---- internal/mode/static/telemetry/collector.go | 19 ++- .../mode/static/telemetry/collector_test.go | 22 ++++ internal/mode/static/telemetry/job.go | 2 + internal/mode/static/telemetry/job_test.go | 64 ++++++++++ .../telemetryfakes/fake_data_collector.go | 117 ++++++++++++++++++ .../telemetryfakes/fake_graph_getter.go | 103 +++++++++++++++ 11 files changed, 367 insertions(+), 36 deletions(-) create mode 100644 internal/mode/static/telemetry/collector_test.go create mode 100644 internal/mode/static/telemetry/job_test.go create mode 100644 internal/mode/static/telemetry/telemetryfakes/fake_data_collector.go create mode 100644 internal/mode/static/telemetry/telemetryfakes/fake_graph_getter.go diff --git a/internal/mode/static/state/change_processor_test.go b/internal/mode/static/state/change_processor_test.go index 8c009c20d4..21456ef0b6 100644 --- a/internal/mode/static/state/change_processor_test.go +++ b/internal/mode/static/state/change_processor_test.go @@ -507,11 +507,11 @@ var _ = Describe("ChangeProcessor", func() { {Namespace: "test", Name: "hr-1"}: expRouteHR1, }, ReferencedSecrets: map[types.NamespacedName]*graph.Secret{}, - ReferencedServices: map[types.NamespacedName]struct{}{ + ReferencedServices: map[types.NamespacedName]*apiv1.Service{ { Namespace: "service-ns", Name: "service", - }: {}, + }: nil, }, } }) diff --git a/internal/mode/static/state/graph/graph.go b/internal/mode/static/state/graph/graph.go index 556c38496b..f8a2f93bd0 100644 --- a/internal/mode/static/state/graph/graph.go +++ b/internal/mode/static/state/graph/graph.go @@ -49,8 +49,8 @@ type Graph struct { // ReferencedNamespaces includes Namespaces with labels that match the Gateway Listener's label selector. ReferencedNamespaces map[types.NamespacedName]*v1.Namespace // ReferencedServices includes the NamespacedNames of all the Services that are referenced by at least one HTTPRoute. - // Storing the whole resource is not necessary, compared to the similar maps above. - ReferencedServices map[types.NamespacedName]struct{} + // Non-existing Services will have their NamespacedName mapped to nil. + ReferencedServices map[types.NamespacedName]*v1.Service } // ProtectedPorts are the ports that may not be configured by a listener with a descriptive name of each port. @@ -123,7 +123,7 @@ func BuildGraph( referencedNamespaces := buildReferencedNamespaces(state.Namespaces, gw) - referencedServices := buildReferencedServices(routes) + referencedServices := buildReferencedServices(routes, state.Services) g := &Graph{ GatewayClass: gc, diff --git a/internal/mode/static/state/graph/graph_test.go b/internal/mode/static/state/graph/graph_test.go index ed10c01d51..fa5c75a942 100644 --- a/internal/mode/static/state/graph/graph_test.go +++ b/internal/mode/static/state/graph/graph_test.go @@ -347,8 +347,8 @@ func TestBuildGraph(t *testing.T) { ReferencedNamespaces: map[types.NamespacedName]*v1.Namespace{ client.ObjectKeyFromObject(ns): ns, }, - ReferencedServices: map[types.NamespacedName]struct{}{ - client.ObjectKeyFromObject(svc): {}, + ReferencedServices: map[types.NamespacedName]*v1.Service{ + client.ObjectKeyFromObject(svc): svc, }, } } @@ -504,8 +504,8 @@ func TestIsReferenced(t *testing.T) { ReferencedNamespaces: map[types.NamespacedName]*v1.Namespace{ client.ObjectKeyFromObject(nsInGraph): nsInGraph, }, - ReferencedServices: map[types.NamespacedName]struct{}{ - client.ObjectKeyFromObject(serviceInGraph): {}, + ReferencedServices: map[types.NamespacedName]*v1.Service{ + client.ObjectKeyFromObject(serviceInGraph): serviceInGraph, }, } diff --git a/internal/mode/static/state/graph/service.go b/internal/mode/static/state/graph/service.go index 9568b59df1..3f14bf098f 100644 --- a/internal/mode/static/state/graph/service.go +++ b/internal/mode/static/state/graph/service.go @@ -1,13 +1,15 @@ package graph import ( + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" ) func buildReferencedServices( routes map[types.NamespacedName]*Route, -) map[types.NamespacedName]struct{} { - svcNames := make(map[types.NamespacedName]struct{}) + services map[types.NamespacedName]*v1.Service, +) map[types.NamespacedName]*v1.Service { + svcNames := make(map[types.NamespacedName]*v1.Service) // routes all have populated ParentRefs from when they were created. // @@ -34,7 +36,7 @@ func buildReferencedServices( // Processes both valid and invalid BackendRefs as invalid ones still have referenced services // we may want to track. if ref.SvcNsName != (types.NamespacedName{}) { - svcNames[ref.SvcNsName] = struct{}{} + svcNames[ref.SvcNsName] = services[ref.SvcNsName] } } } diff --git a/internal/mode/static/state/graph/service_test.go b/internal/mode/static/state/graph/service_test.go index 82cf2960e0..b69219fa8d 100644 --- a/internal/mode/static/state/graph/service_test.go +++ b/internal/mode/static/state/graph/service_test.go @@ -4,6 +4,8 @@ import ( "testing" . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ) @@ -192,9 +194,15 @@ func TestBuildReferencedServices(t *testing.T) { }, } + services := map[types.NamespacedName]*v1.Service{ + {Namespace: "banana-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "banana-service"}}, + {Namespace: "service-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "service"}}, + {Namespace: "service-ns2", Name: "service2"}: {ObjectMeta: metav1.ObjectMeta{Name: "service2"}}, + } + tests := []struct { routes map[types.NamespacedName]*Route - exp map[types.NamespacedName]struct{} + exp map[types.NamespacedName]*v1.Service name string }{ { @@ -202,8 +210,8 @@ func TestBuildReferencedServices(t *testing.T) { routes: map[types.NamespacedName]*Route{ {Name: "normal-route"}: normalRoute, }, - exp: map[types.NamespacedName]struct{}{ - {Namespace: "banana-ns", Name: "service"}: {}, + exp: map[types.NamespacedName]*v1.Service{ + {Namespace: "banana-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "banana-service"}}, }, }, { @@ -211,9 +219,9 @@ func TestBuildReferencedServices(t *testing.T) { routes: map[types.NamespacedName]*Route{ {Name: "two-svc-one-rule"}: validRouteTwoServicesOneRule, }, - exp: map[types.NamespacedName]struct{}{ - {Namespace: "service-ns", Name: "service"}: {}, - {Namespace: "service-ns2", Name: "service2"}: {}, + exp: map[types.NamespacedName]*v1.Service{ + {Namespace: "service-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "service"}}, + {Namespace: "service-ns2", Name: "service2"}: {ObjectMeta: metav1.ObjectMeta{Name: "service2"}}, }, }, { @@ -221,9 +229,9 @@ func TestBuildReferencedServices(t *testing.T) { routes: map[types.NamespacedName]*Route{ {Name: "one-svc-per-rule"}: validRouteTwoServicesTwoRules, }, - exp: map[types.NamespacedName]struct{}{ - {Namespace: "service-ns", Name: "service"}: {}, - {Namespace: "service-ns2", Name: "service2"}: {}, + exp: map[types.NamespacedName]*v1.Service{ + {Namespace: "service-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "service"}}, + {Namespace: "service-ns2", Name: "service2"}: {ObjectMeta: metav1.ObjectMeta{Name: "service2"}}, }, }, { @@ -232,9 +240,9 @@ func TestBuildReferencedServices(t *testing.T) { {Name: "one-svc-per-rule"}: validRouteTwoServicesTwoRules, {Name: "two-svc-one-rule"}: validRouteTwoServicesOneRule, }, - exp: map[types.NamespacedName]struct{}{ - {Namespace: "service-ns", Name: "service"}: {}, - {Namespace: "service-ns2", Name: "service2"}: {}, + exp: map[types.NamespacedName]*v1.Service{ + {Namespace: "service-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "service"}}, + {Namespace: "service-ns2", Name: "service2"}: {ObjectMeta: metav1.ObjectMeta{Name: "service2"}}, }, }, { @@ -243,10 +251,10 @@ func TestBuildReferencedServices(t *testing.T) { {Name: "one-svc-per-rule"}: validRouteTwoServicesTwoRules, {Name: "normal-route"}: normalRoute, }, - exp: map[types.NamespacedName]struct{}{ - {Namespace: "service-ns", Name: "service"}: {}, - {Namespace: "service-ns2", Name: "service2"}: {}, - {Namespace: "banana-ns", Name: "service"}: {}, + exp: map[types.NamespacedName]*v1.Service{ + {Namespace: "service-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "service"}}, + {Namespace: "service-ns2", Name: "service2"}: {ObjectMeta: metav1.ObjectMeta{Name: "service2"}}, + {Namespace: "banana-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "banana-service"}}, }, }, { @@ -269,8 +277,8 @@ func TestBuildReferencedServices(t *testing.T) { {Name: "normal-route"}: normalRoute, {Name: "invalid-route"}: invalidRoute, }, - exp: map[types.NamespacedName]struct{}{ - {Namespace: "banana-ns", Name: "service"}: {}, + exp: map[types.NamespacedName]*v1.Service{ + {Namespace: "banana-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "banana-service"}}, }, }, { @@ -278,8 +286,8 @@ func TestBuildReferencedServices(t *testing.T) { routes: map[types.NamespacedName]*Route{ {Name: "multiple-parent-ref-route"}: attachedRouteWithManyParentRefs, }, - exp: map[types.NamespacedName]struct{}{ - {Namespace: "service-ns", Name: "service"}: {}, + exp: map[types.NamespacedName]*v1.Service{ + {Namespace: "service-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "service"}}, }, }, { @@ -294,7 +302,7 @@ func TestBuildReferencedServices(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { g := NewWithT(t) - g.Expect(buildReferencedServices(test.routes)).To(Equal(test.exp)) + g.Expect(buildReferencedServices(test.routes, services)).To(Equal(test.exp)) }) } } diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index 7bdfd0e523..473ba44485 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -4,11 +4,14 @@ import ( "context" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/graph" ) +//go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . GraphGetter + type GraphGetter interface { GetLatestGraph() *graph.Graph } @@ -97,9 +100,19 @@ func collectGraphResourceCount(graphGetter GraphGetter) NGFResourceCounts { ngfResourceCounts.Gateways = 1 } ngfResourceCounts.HTTPRoutes = len(g.Routes) - ngfResourceCounts.Secrets = len(g.ReferencedSecrets) - // WIP: ReferencedServices may contain non-existing services - ngfResourceCounts.Services = len(g.ReferencedServices) + ngfResourceCounts.Secrets = countReferencedResources(g.ReferencedSecrets) + ngfResourceCounts.Services = countReferencedResources(g.ReferencedServices) return ngfResourceCounts } + +func countReferencedResources[T comparable](referencedMap map[types.NamespacedName]T) int { + var count int + var zeroValue T + for name := range referencedMap { + if referencedMap[name] != zeroValue { + count++ + } + } + return count +} diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go new file mode 100644 index 0000000000..2a452f948d --- /dev/null +++ b/internal/mode/static/telemetry/collector_test.go @@ -0,0 +1,22 @@ +package telemetry_test + +import ( + . "github.com/onsi/ginkgo/v2" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry" +) + +var _ = Describe("Collector", func() { + var ( + k8sClient client.Client + _ telemetry.DataCollector + version string + ) + BeforeEach(func() { + version = "1.1" + k8sClient = fake.NewFakeClient() + _ = telemetry.NewDataCollector(telemetry.DataCollectorConfig{K8sClientReader: k8sClient, Version: version}) + }) +}) diff --git a/internal/mode/static/telemetry/job.go b/internal/mode/static/telemetry/job.go index 3c079a31fe..195b159049 100644 --- a/internal/mode/static/telemetry/job.go +++ b/internal/mode/static/telemetry/job.go @@ -9,6 +9,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/manager" ) +//go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . DataCollector + // DataCollector collects telemetry data. type DataCollector interface { Collect(ctx context.Context) (Data, error) diff --git a/internal/mode/static/telemetry/job_test.go b/internal/mode/static/telemetry/job_test.go new file mode 100644 index 0000000000..76e2124c74 --- /dev/null +++ b/internal/mode/static/telemetry/job_test.go @@ -0,0 +1,64 @@ +package telemetry_test + +import ( + "context" + "errors" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry" + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry/telemetryfakes" +) + +var _ = Describe("Job", func() { + var ( + job *telemetry.Job + exporter *telemetryfakes.FakeExporter + dataCollector *telemetryfakes.FakeDataCollector + ) + const timeout = 10 * time.Second + + BeforeEach(func() { + exporter = &telemetryfakes.FakeExporter{} + dataCollector = &telemetryfakes.FakeDataCollector{} + job = telemetry.NewJob(telemetry.JobConfig{ + Exporter: exporter, + Logger: zap.New(), + Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the Job should report a few times + DataCollector: dataCollector, + }) + }) + + DescribeTable( + "Job runs with a few reports without any errors", + func(exporterError error) { + // The fact that exporter return an error must not affect how many times the Job makes a report. + exporter.ExportReturns(exporterError) + + ctx, cancel := context.WithTimeout(context.Background(), timeout) + + errCh := make(chan error) + go func() { + errCh <- job.Start(ctx) + close(errCh) + }() + + const minReports = 2 // ensure that the Job reports more than once: it doesn't exit after the first report + + Eventually(exporter.ExportCallCount).Should(BeNumerically(">=", minReports)) + for i := 0; i < minReports; i++ { + _, data := exporter.ExportArgsForCall(i) + Expect(data).To(Equal(telemetry.Data{})) + } + + cancel() + Eventually(errCh).Should(Receive(BeNil())) + Eventually(errCh).Should(BeClosed()) + }, + Entry("Job runs with Exporter not returning errors", nil), + Entry("Job runs with Exporter returning an error", errors.New("some error")), + ) +}) diff --git a/internal/mode/static/telemetry/telemetryfakes/fake_data_collector.go b/internal/mode/static/telemetry/telemetryfakes/fake_data_collector.go new file mode 100644 index 0000000000..0bce02c41c --- /dev/null +++ b/internal/mode/static/telemetry/telemetryfakes/fake_data_collector.go @@ -0,0 +1,117 @@ +// Code generated by counterfeiter. DO NOT EDIT. +package telemetryfakes + +import ( + "context" + "sync" + + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry" +) + +type FakeDataCollector struct { + CollectStub func(context.Context) (telemetry.Data, error) + collectMutex sync.RWMutex + collectArgsForCall []struct { + arg1 context.Context + } + collectReturns struct { + result1 telemetry.Data + result2 error + } + collectReturnsOnCall map[int]struct { + result1 telemetry.Data + result2 error + } + invocations map[string][][]interface{} + invocationsMutex sync.RWMutex +} + +func (fake *FakeDataCollector) Collect(arg1 context.Context) (telemetry.Data, error) { + fake.collectMutex.Lock() + ret, specificReturn := fake.collectReturnsOnCall[len(fake.collectArgsForCall)] + fake.collectArgsForCall = append(fake.collectArgsForCall, struct { + arg1 context.Context + }{arg1}) + stub := fake.CollectStub + fakeReturns := fake.collectReturns + fake.recordInvocation("Collect", []interface{}{arg1}) + fake.collectMutex.Unlock() + if stub != nil { + return stub(arg1) + } + if specificReturn { + return ret.result1, ret.result2 + } + return fakeReturns.result1, fakeReturns.result2 +} + +func (fake *FakeDataCollector) CollectCallCount() int { + fake.collectMutex.RLock() + defer fake.collectMutex.RUnlock() + return len(fake.collectArgsForCall) +} + +func (fake *FakeDataCollector) CollectCalls(stub func(context.Context) (telemetry.Data, error)) { + fake.collectMutex.Lock() + defer fake.collectMutex.Unlock() + fake.CollectStub = stub +} + +func (fake *FakeDataCollector) CollectArgsForCall(i int) context.Context { + fake.collectMutex.RLock() + defer fake.collectMutex.RUnlock() + argsForCall := fake.collectArgsForCall[i] + return argsForCall.arg1 +} + +func (fake *FakeDataCollector) CollectReturns(result1 telemetry.Data, result2 error) { + fake.collectMutex.Lock() + defer fake.collectMutex.Unlock() + fake.CollectStub = nil + fake.collectReturns = struct { + result1 telemetry.Data + result2 error + }{result1, result2} +} + +func (fake *FakeDataCollector) CollectReturnsOnCall(i int, result1 telemetry.Data, result2 error) { + fake.collectMutex.Lock() + defer fake.collectMutex.Unlock() + fake.CollectStub = nil + if fake.collectReturnsOnCall == nil { + fake.collectReturnsOnCall = make(map[int]struct { + result1 telemetry.Data + result2 error + }) + } + fake.collectReturnsOnCall[i] = struct { + result1 telemetry.Data + result2 error + }{result1, result2} +} + +func (fake *FakeDataCollector) Invocations() map[string][][]interface{} { + fake.invocationsMutex.RLock() + defer fake.invocationsMutex.RUnlock() + fake.collectMutex.RLock() + defer fake.collectMutex.RUnlock() + copiedInvocations := map[string][][]interface{}{} + for key, value := range fake.invocations { + copiedInvocations[key] = value + } + return copiedInvocations +} + +func (fake *FakeDataCollector) recordInvocation(key string, args []interface{}) { + fake.invocationsMutex.Lock() + defer fake.invocationsMutex.Unlock() + if fake.invocations == nil { + fake.invocations = map[string][][]interface{}{} + } + if fake.invocations[key] == nil { + fake.invocations[key] = [][]interface{}{} + } + fake.invocations[key] = append(fake.invocations[key], args) +} + +var _ telemetry.DataCollector = new(FakeDataCollector) diff --git a/internal/mode/static/telemetry/telemetryfakes/fake_graph_getter.go b/internal/mode/static/telemetry/telemetryfakes/fake_graph_getter.go new file mode 100644 index 0000000000..2b7c5b2e13 --- /dev/null +++ b/internal/mode/static/telemetry/telemetryfakes/fake_graph_getter.go @@ -0,0 +1,103 @@ +// Code generated by counterfeiter. DO NOT EDIT. +package telemetryfakes + +import ( + "sync" + + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/graph" + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry" +) + +type FakeGraphGetter struct { + GetLatestGraphStub func() *graph.Graph + getLatestGraphMutex sync.RWMutex + getLatestGraphArgsForCall []struct { + } + getLatestGraphReturns struct { + result1 *graph.Graph + } + getLatestGraphReturnsOnCall map[int]struct { + result1 *graph.Graph + } + invocations map[string][][]interface{} + invocationsMutex sync.RWMutex +} + +func (fake *FakeGraphGetter) GetLatestGraph() *graph.Graph { + fake.getLatestGraphMutex.Lock() + ret, specificReturn := fake.getLatestGraphReturnsOnCall[len(fake.getLatestGraphArgsForCall)] + fake.getLatestGraphArgsForCall = append(fake.getLatestGraphArgsForCall, struct { + }{}) + stub := fake.GetLatestGraphStub + fakeReturns := fake.getLatestGraphReturns + fake.recordInvocation("GetLatestGraph", []interface{}{}) + fake.getLatestGraphMutex.Unlock() + if stub != nil { + return stub() + } + if specificReturn { + return ret.result1 + } + return fakeReturns.result1 +} + +func (fake *FakeGraphGetter) GetLatestGraphCallCount() int { + fake.getLatestGraphMutex.RLock() + defer fake.getLatestGraphMutex.RUnlock() + return len(fake.getLatestGraphArgsForCall) +} + +func (fake *FakeGraphGetter) GetLatestGraphCalls(stub func() *graph.Graph) { + fake.getLatestGraphMutex.Lock() + defer fake.getLatestGraphMutex.Unlock() + fake.GetLatestGraphStub = stub +} + +func (fake *FakeGraphGetter) GetLatestGraphReturns(result1 *graph.Graph) { + fake.getLatestGraphMutex.Lock() + defer fake.getLatestGraphMutex.Unlock() + fake.GetLatestGraphStub = nil + fake.getLatestGraphReturns = struct { + result1 *graph.Graph + }{result1} +} + +func (fake *FakeGraphGetter) GetLatestGraphReturnsOnCall(i int, result1 *graph.Graph) { + fake.getLatestGraphMutex.Lock() + defer fake.getLatestGraphMutex.Unlock() + fake.GetLatestGraphStub = nil + if fake.getLatestGraphReturnsOnCall == nil { + fake.getLatestGraphReturnsOnCall = make(map[int]struct { + result1 *graph.Graph + }) + } + fake.getLatestGraphReturnsOnCall[i] = struct { + result1 *graph.Graph + }{result1} +} + +func (fake *FakeGraphGetter) Invocations() map[string][][]interface{} { + fake.invocationsMutex.RLock() + defer fake.invocationsMutex.RUnlock() + fake.getLatestGraphMutex.RLock() + defer fake.getLatestGraphMutex.RUnlock() + copiedInvocations := map[string][][]interface{}{} + for key, value := range fake.invocations { + copiedInvocations[key] = value + } + return copiedInvocations +} + +func (fake *FakeGraphGetter) recordInvocation(key string, args []interface{}) { + fake.invocationsMutex.Lock() + defer fake.invocationsMutex.Unlock() + if fake.invocations == nil { + fake.invocations = map[string][][]interface{}{} + } + if fake.invocations[key] == nil { + fake.invocations[key] = [][]interface{}{} + } + fake.invocations[key] = append(fake.invocations[key], args) +} + +var _ telemetry.GraphGetter = new(FakeGraphGetter) From cfc0519653e6bcd53a854c2e147bb18834a113cc Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 26 Jan 2024 11:32:27 -0800 Subject: [PATCH 11/43] Add collector tests --- .../mode/static/telemetry/collector_test.go | 208 +++++++++++++++++- 1 file changed, 200 insertions(+), 8 deletions(-) diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index 2a452f948d..f0ff59a17d 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -1,22 +1,214 @@ package telemetry_test import ( + "context" + "errors" + "fmt" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/client/fake" + "github.com/nginxinc/nginx-gateway-fabric/internal/framework/events/eventsfakes" + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/graph" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry" + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry/telemetryfakes" ) -var _ = Describe("Collector", func() { +var _ = Describe("Collector", Ordered, func() { var ( - k8sClient client.Client - _ telemetry.DataCollector - version string + k8sClientReader *eventsfakes.FakeReader + fakeGraphGetter *telemetryfakes.FakeGraphGetter + dataCollector telemetry.DataCollector + version string + graph1, graph2 *graph.Graph + ctx context.Context ) - BeforeEach(func() { + + BeforeAll(func() { + ctx = context.Background() version = "1.1" - k8sClient = fake.NewFakeClient() - _ = telemetry.NewDataCollector(telemetry.DataCollectorConfig{K8sClientReader: k8sClient, Version: version}) + k8sClientReader = &eventsfakes.FakeReader{} + fakeGraphGetter = &telemetryfakes.FakeGraphGetter{} + fakeGraphGetter.GetLatestGraphReturns(&graph.Graph{}) + + dataCollector = telemetry.NewDataCollector(telemetry.DataCollectorConfig{ + K8sClientReader: k8sClientReader, + GraphGetter: fakeGraphGetter, + Version: version, + }) + + secret1 := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret1"}} + secret2 := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret2"}} + nilsecret := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "nilsecret"}} + + svc1 := &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "svc1"}} + svc2 := &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "svc2"}} + nilsvc := &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "nilsvc"}} + + graph1 = &graph.Graph{ + GatewayClass: &graph.GatewayClass{}, + Gateway: &graph.Gateway{}, + Routes: map[types.NamespacedName]*graph.Route{ + {Namespace: "test", Name: "hr-1"}: {}, + }, + ReferencedSecrets: map[types.NamespacedName]*graph.Secret{ + client.ObjectKeyFromObject(secret1): { + Source: secret1, + }, + }, + ReferencedServices: map[types.NamespacedName]*v1.Service{ + client.ObjectKeyFromObject(svc1): svc1, + }, + } + graph2 = &graph.Graph{ + GatewayClass: &graph.GatewayClass{}, + Gateway: &graph.Gateway{}, + Routes: map[types.NamespacedName]*graph.Route{ + {Namespace: "test", Name: "hr-1"}: {}, + {Namespace: "test", Name: "hr-2"}: {}, + {Namespace: "test", Name: "hr-3"}: {}, + }, + ReferencedSecrets: map[types.NamespacedName]*graph.Secret{ + client.ObjectKeyFromObject(secret1): { + Source: secret1, + }, + client.ObjectKeyFromObject(secret2): { + Source: secret2, + }, + client.ObjectKeyFromObject(nilsecret): nil, + }, + ReferencedServices: map[types.NamespacedName]*v1.Service{ + client.ObjectKeyFromObject(svc1): svc1, + client.ObjectKeyFromObject(svc2): svc2, + client.ObjectKeyFromObject(nilsvc): nil, + }, + } + }) + When("retrieving node count data", func() { + It("generates correct data for no nodes", func() { + k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { + Expect(option).To(BeEmpty()) + + switch typedList := list.(type) { + case *v1.NodeList: + typedList.Items = []v1.Node{} + default: + Fail(fmt.Sprintf("unknown type: %T", typedList)) + } + return nil + }) + data, err := dataCollector.Collect(ctx) + expData := telemetry.Data{ + ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, + NodeCount: 0, + NGFResourceCounts: telemetry.NGFResourceCounts{}, + } + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + It("generates correct data for one node", func() { + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "node1"}, + } + + k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { + Expect(option).To(BeEmpty()) + + switch typedList := list.(type) { + case *v1.NodeList: + typedList.Items = append(typedList.Items, node) + default: + Fail(fmt.Sprintf("unknown type: %T", typedList)) + } + return nil + }) + data, err := dataCollector.Collect(ctx) + expData := telemetry.Data{ + ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, + NodeCount: 1, + NGFResourceCounts: telemetry.NGFResourceCounts{}, + } + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + It("generates correct data for multiple nodes", func() { + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "node1"}, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "node2"}, + } + node3 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "node3"}, + } + k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { + Expect(option).To(BeEmpty()) + + switch typedList := list.(type) { + case *v1.NodeList: + typedList.Items = append(typedList.Items, node, node2, node3) + default: + Fail(fmt.Sprintf("unknown type: %T", typedList)) + } + return nil + }) + data, err := dataCollector.Collect(ctx) + expData := telemetry.Data{ + ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, + NodeCount: 3, + NGFResourceCounts: telemetry.NGFResourceCounts{}, + } + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + }) + When("retrieving NGF resource counts", func() { + // TODO: Fix this + It("generates correct data for one resources", func() { + fakeGraphGetter.GetLatestGraphReturns(graph1) + data, err := dataCollector.Collect(ctx) + expData := telemetry.Data{ + ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, + NodeCount: 3, + NGFResourceCounts: telemetry.NGFResourceCounts{ + Gateways: 1, + GatewayClasses: 1, + HTTPRoutes: 1, + Secrets: 1, + Services: 1, + }, + } + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + It("generates correct data for multiple resources", func() { + fakeGraphGetter.GetLatestGraphReturns(graph2) + data, err := dataCollector.Collect(ctx) + expData := telemetry.Data{ + ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, + NodeCount: 3, + NGFResourceCounts: telemetry.NGFResourceCounts{ + Gateways: 1, + GatewayClasses: 1, + HTTPRoutes: 3, + Secrets: 2, + Services: 2, + }, + } + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + }) + When("it encounters an error while collecting data", func() { + It("should error on client errors", func() { + k8sClientReader.ListReturns(errors.New("there was an error")) + ctx := context.Background() + _, err := dataCollector.Collect(ctx) + Expect(err).To(HaveOccurred()) + }) }) }) From f4e16a76d1d838f19f9227c5606793c8415e380f Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 26 Jan 2024 11:41:53 -0800 Subject: [PATCH 12/43] Refactor style on collector tests --- .../mode/static/telemetry/collector_test.go | 34 ++++++++++++++----- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index f0ff59a17d..4172c306b7 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -31,6 +31,7 @@ var _ = Describe("Collector", Ordered, func() { BeforeAll(func() { ctx = context.Background() version = "1.1" + k8sClientReader = &eventsfakes.FakeReader{} fakeGraphGetter = &telemetryfakes.FakeGraphGetter{} fakeGraphGetter.GetLatestGraphReturns(&graph.Graph{}) @@ -101,12 +102,15 @@ var _ = Describe("Collector", Ordered, func() { } return nil }) - data, err := dataCollector.Collect(ctx) + expData := telemetry.Data{ ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, NodeCount: 0, NGFResourceCounts: telemetry.NGFResourceCounts{}, } + + data, err := dataCollector.Collect(ctx) + Expect(err).To(BeNil()) Expect(expData).To(Equal(data)) }) @@ -126,12 +130,15 @@ var _ = Describe("Collector", Ordered, func() { } return nil }) - data, err := dataCollector.Collect(ctx) + expData := telemetry.Data{ ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, NodeCount: 1, NGFResourceCounts: telemetry.NGFResourceCounts{}, } + + data, err := dataCollector.Collect(ctx) + Expect(err).To(BeNil()) Expect(expData).To(Equal(data)) }) @@ -145,6 +152,7 @@ var _ = Describe("Collector", Ordered, func() { node3 := v1.Node{ ObjectMeta: metav1.ObjectMeta{Name: "node3"}, } + k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { Expect(option).To(BeEmpty()) @@ -156,21 +164,23 @@ var _ = Describe("Collector", Ordered, func() { } return nil }) - data, err := dataCollector.Collect(ctx) + expData := telemetry.Data{ ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, NodeCount: 3, NGFResourceCounts: telemetry.NGFResourceCounts{}, } + + data, err := dataCollector.Collect(ctx) + Expect(err).To(BeNil()) Expect(expData).To(Equal(data)) }) }) When("retrieving NGF resource counts", func() { - // TODO: Fix this - It("generates correct data for one resources", func() { + It("generates correct data for graph with one of each resource", func() { fakeGraphGetter.GetLatestGraphReturns(graph1) - data, err := dataCollector.Collect(ctx) + expData := telemetry.Data{ ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, NodeCount: 3, @@ -182,12 +192,15 @@ var _ = Describe("Collector", Ordered, func() { Services: 1, }, } + + data, err := dataCollector.Collect(ctx) + Expect(err).To(BeNil()) Expect(expData).To(Equal(data)) }) - It("generates correct data for multiple resources", func() { + It("generates correct data for graph with multiple of each resource", func() { fakeGraphGetter.GetLatestGraphReturns(graph2) - data, err := dataCollector.Collect(ctx) + expData := telemetry.Data{ ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, NodeCount: 3, @@ -199,6 +212,9 @@ var _ = Describe("Collector", Ordered, func() { Services: 2, }, } + + data, err := dataCollector.Collect(ctx) + Expect(err).To(BeNil()) Expect(expData).To(Equal(data)) }) @@ -206,7 +222,7 @@ var _ = Describe("Collector", Ordered, func() { When("it encounters an error while collecting data", func() { It("should error on client errors", func() { k8sClientReader.ListReturns(errors.New("there was an error")) - ctx := context.Background() + _, err := dataCollector.Collect(ctx) Expect(err).To(HaveOccurred()) }) From 4372871dd526aecce4b22d7ea8e5c9e33a2aa61b Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 26 Jan 2024 14:14:13 -0800 Subject: [PATCH 13/43] Add small style changes and documentation --- internal/mode/static/telemetry/collector.go | 8 +++++++- internal/mode/static/telemetry/collector_test.go | 6 ++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index 473ba44485..8e97863b00 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -12,20 +12,22 @@ import ( //go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . GraphGetter +// GraphGetter gets the current Graph. type GraphGetter interface { GetLatestGraph() *graph.Graph } +// NGFResourceCounts stores the counts of all relevant Graph resources. type NGFResourceCounts struct { Gateways int GatewayClasses int HTTPRoutes int Secrets int Services int - EndpointSlices int Endpoints int } +// ProjectMetadata stores the name of the project and the current version. type ProjectMetadata struct { Name string Version string @@ -61,6 +63,7 @@ func NewDataCollector( } } +// Collect collects and returns telemetry Data. func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) { nodeCount, err := collectNodeCount(ctx, c.cfg.K8sClientReader) if err != nil { @@ -106,9 +109,12 @@ func collectGraphResourceCount(graphGetter GraphGetter) NGFResourceCounts { return ngfResourceCounts } +// countReferencedResources counts the amount of non-nil resources. func countReferencedResources[T comparable](referencedMap map[types.NamespacedName]T) int { var count int + // because we can't compare T to nil, we need to use the zeroValue of T var zeroValue T + for name := range referencedMap { if referencedMap[name] != zeroValue { count++ diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index 4172c306b7..887e6cc118 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -89,6 +89,7 @@ var _ = Describe("Collector", Ordered, func() { }, } }) + When("retrieving node count data", func() { It("generates correct data for no nodes", func() { k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { @@ -114,6 +115,7 @@ var _ = Describe("Collector", Ordered, func() { Expect(err).To(BeNil()) Expect(expData).To(Equal(data)) }) + It("generates correct data for one node", func() { node := v1.Node{ ObjectMeta: metav1.ObjectMeta{Name: "node1"}, @@ -142,6 +144,7 @@ var _ = Describe("Collector", Ordered, func() { Expect(err).To(BeNil()) Expect(expData).To(Equal(data)) }) + It("generates correct data for multiple nodes", func() { node := v1.Node{ ObjectMeta: metav1.ObjectMeta{Name: "node1"}, @@ -177,6 +180,7 @@ var _ = Describe("Collector", Ordered, func() { Expect(expData).To(Equal(data)) }) }) + When("retrieving NGF resource counts", func() { It("generates correct data for graph with one of each resource", func() { fakeGraphGetter.GetLatestGraphReturns(graph1) @@ -198,6 +202,7 @@ var _ = Describe("Collector", Ordered, func() { Expect(err).To(BeNil()) Expect(expData).To(Equal(data)) }) + It("generates correct data for graph with multiple of each resource", func() { fakeGraphGetter.GetLatestGraphReturns(graph2) @@ -219,6 +224,7 @@ var _ = Describe("Collector", Ordered, func() { Expect(expData).To(Equal(data)) }) }) + When("it encounters an error while collecting data", func() { It("should error on client errors", func() { k8sClientReader.ListReturns(errors.New("there was an error")) From 7eec0dfcbe5cecefb8a6d6a003c7944fedd99687 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 26 Jan 2024 14:28:24 -0800 Subject: [PATCH 14/43] Add error case for latest graph being nil --- .../state/statefakes/fake_change_processor.go | 2 +- internal/mode/static/telemetry/collector.go | 15 ++++++++++++--- internal/mode/static/telemetry/collector_test.go | 11 +++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/internal/mode/static/state/statefakes/fake_change_processor.go b/internal/mode/static/state/statefakes/fake_change_processor.go index 7bd887ed0e..1b710d4148 100644 --- a/internal/mode/static/state/statefakes/fake_change_processor.go +++ b/internal/mode/static/state/statefakes/fake_change_processor.go @@ -22,7 +22,6 @@ type FakeChangeProcessor struct { captureUpsertChangeArgsForCall []struct { arg1 client.Object } - ProcessStub func() (state.ChangeType, *graph.Graph) GetLatestGraphStub func() *graph.Graph getLatestGraphMutex sync.RWMutex getLatestGraphArgsForCall []struct { @@ -33,6 +32,7 @@ type FakeChangeProcessor struct { getLatestGraphReturnsOnCall map[int]struct { result1 *graph.Graph } + ProcessStub func() (state.ChangeType, *graph.Graph) processMutex sync.RWMutex processArgsForCall []struct { } diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index 8e97863b00..2b608500a3 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -2,6 +2,7 @@ package telemetry import ( "context" + "errors" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" @@ -69,7 +70,11 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) { if err != nil { return Data{}, err } - graphResourceCount := collectGraphResourceCount(c.cfg.GraphGetter) + + graphResourceCount, err := collectGraphResourceCount(c.cfg.GraphGetter) + if err != nil { + return Data{}, err + } data := Data{ NodeCount: nodeCount, @@ -92,10 +97,14 @@ func collectNodeCount(ctx context.Context, k8sClient client.Reader) (int, error) return len(nodes.Items), nil } -func collectGraphResourceCount(graphGetter GraphGetter) NGFResourceCounts { +func collectGraphResourceCount(graphGetter GraphGetter) (NGFResourceCounts, error) { ngfResourceCounts := NGFResourceCounts{} g := graphGetter.GetLatestGraph() + if g == nil { + return NGFResourceCounts{}, errors.New("latest graph cannot be nil") + } + if g.GatewayClass != nil { ngfResourceCounts.GatewayClasses = 1 } @@ -106,7 +115,7 @@ func collectGraphResourceCount(graphGetter GraphGetter) NGFResourceCounts { ngfResourceCounts.Secrets = countReferencedResources(g.ReferencedSecrets) ngfResourceCounts.Services = countReferencedResources(g.ReferencedServices) - return ngfResourceCounts + return ngfResourceCounts, nil } // countReferencedResources counts the amount of non-nil resources. diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index 887e6cc118..b62fea4111 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -226,11 +226,22 @@ var _ = Describe("Collector", Ordered, func() { }) When("it encounters an error while collecting data", func() { + BeforeEach(func() { + k8sClientReader.ListReturns(nil) + }) + It("should error on client errors", func() { k8sClientReader.ListReturns(errors.New("there was an error")) _, err := dataCollector.Collect(ctx) Expect(err).To(HaveOccurred()) }) + + It("should error on latest graph errors", func() { + fakeGraphGetter.GetLatestGraphReturns(nil) + + _, err := dataCollector.Collect(ctx) + Expect(err).To(HaveOccurred()) + }) }) }) From a1e230f20273b7bee2db45ff840fb5e8d316b4c3 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 26 Jan 2024 14:40:12 -0800 Subject: [PATCH 15/43] Add small feedback for error message --- internal/mode/static/telemetry/job.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/mode/static/telemetry/job.go b/internal/mode/static/telemetry/job.go index 195b159049..62c20d5b0b 100644 --- a/internal/mode/static/telemetry/job.go +++ b/internal/mode/static/telemetry/job.go @@ -47,19 +47,19 @@ func (j *Job) Start(ctx context.Context) error { report := func(ctx context.Context) { // Gather telemetry - j.cfg.Logger.V(1).Info("Gathering telemetry") + j.cfg.Logger.V(1).Info("Gathering telemetry data") // We will need to gather data as defined in https://github.com/nginxinc/nginx-gateway-fabric/issues/793 data, err := j.cfg.DataCollector.Collect(ctx) if err != nil { - j.cfg.Logger.Error(err, "Failed to collect telemetry") + j.cfg.Logger.Error(err, "Failed to collect telemetry data") } // Export telemetry - j.cfg.Logger.V(1).Info("Exporting telemetry") + j.cfg.Logger.V(1).Info("Exporting telemetry data") if err := j.cfg.Exporter.Export(ctx, data); err != nil { - j.cfg.Logger.Error(err, "Failed to export telemetry") + j.cfg.Logger.Error(err, "Failed to export telemetry data") } } From d92cd8374cbfb4ee8fe910752d3245798e819afe Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 26 Jan 2024 15:22:26 -0800 Subject: [PATCH 16/43] Add latest configuration and tests --- internal/mode/static/handler.go | 16 ++- internal/mode/static/telemetry/collector.go | 27 ++++- .../mode/static/telemetry/collector_test.go | 102 +++++++++++++++-- .../fake_configuration_getter.go | 103 ++++++++++++++++++ 4 files changed, 233 insertions(+), 15 deletions(-) create mode 100644 internal/mode/static/telemetry/telemetryfakes/fake_configuration_getter.go diff --git a/internal/mode/static/handler.go b/internal/mode/static/handler.go index d0621e2fa0..ba7213273c 100644 --- a/internal/mode/static/handler.go +++ b/internal/mode/static/handler.go @@ -72,7 +72,9 @@ type eventHandlerConfig struct { // (2) Keeping the statuses of the Gateway API resources updated. // (3) Updating control plane configuration. type eventHandlerImpl struct { - cfg eventHandlerConfig + // latestConfiguration is the latest Configuration generation. + latestConfiguration *dataplane.Configuration + cfg eventHandlerConfig } // newEventHandlerImpl creates a new eventHandlerImpl. @@ -111,16 +113,20 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, logger logr.Log return case state.EndpointsOnlyChange: h.cfg.version++ + cfg := dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version) + h.latestConfiguration = &cfg err = h.updateUpstreamServers( ctx, logger, - dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version), + cfg, ) case state.ClusterStateChange: h.cfg.version++ + cfg := dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version) + h.latestConfiguration = &cfg err = h.updateNginxConf( ctx, - dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version), + cfg, ) } @@ -384,3 +390,7 @@ func getGatewayAddresses( return gwAddresses, nil } + +func (h *eventHandlerImpl) GetLatestConfiguration() *dataplane.Configuration { + return h.latestConfiguration +} diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index 2b608500a3..80f1a9d787 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -8,6 +8,7 @@ import ( "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/dataplane" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/graph" ) @@ -18,6 +19,13 @@ type GraphGetter interface { GetLatestGraph() *graph.Graph } +//go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . ConfigurationGetter + +// ConfigurationGetter gets the current Configuration. +type ConfigurationGetter interface { + GetLatestConfiguration() *dataplane.Configuration +} + // NGFResourceCounts stores the counts of all relevant Graph resources. type NGFResourceCounts struct { Gateways int @@ -48,6 +56,8 @@ type DataCollectorConfig struct { K8sClientReader client.Reader // GraphGetter allows us to get the Graph. GraphGetter GraphGetter + // ConfigurationGetter allows us to get the Configuration. + ConfigurationGetter ConfigurationGetter // Version is the NGF version. Version string } @@ -71,7 +81,7 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) { return Data{}, err } - graphResourceCount, err := collectGraphResourceCount(c.cfg.GraphGetter) + graphResourceCount, err := collectGraphResourceCount(c.cfg.GraphGetter, c.cfg.ConfigurationGetter) if err != nil { return Data{}, err } @@ -97,13 +107,20 @@ func collectNodeCount(ctx context.Context, k8sClient client.Reader) (int, error) return len(nodes.Items), nil } -func collectGraphResourceCount(graphGetter GraphGetter) (NGFResourceCounts, error) { +func collectGraphResourceCount( + graphGetter GraphGetter, + configurationGetter ConfigurationGetter, +) (NGFResourceCounts, error) { ngfResourceCounts := NGFResourceCounts{} g := graphGetter.GetLatestGraph() + cfg := configurationGetter.GetLatestConfiguration() if g == nil { return NGFResourceCounts{}, errors.New("latest graph cannot be nil") } + if cfg == nil { + return NGFResourceCounts{}, errors.New("latest configuration cannot be nil") + } if g.GatewayClass != nil { ngfResourceCounts.GatewayClasses = 1 @@ -115,6 +132,12 @@ func collectGraphResourceCount(graphGetter GraphGetter) (NGFResourceCounts, erro ngfResourceCounts.Secrets = countReferencedResources(g.ReferencedSecrets) ngfResourceCounts.Services = countReferencedResources(g.ReferencedServices) + for _, upstream := range cfg.Upstreams { + if upstream.ErrorMsg == "" { + ngfResourceCounts.Endpoints += len(upstream.Endpoints) + } + } + return ngfResourceCounts, nil } diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index b62fea4111..d0f6b5af0d 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -13,19 +13,23 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "github.com/nginxinc/nginx-gateway-fabric/internal/framework/events/eventsfakes" + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/dataplane" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/graph" + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/resolver" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry/telemetryfakes" ) var _ = Describe("Collector", Ordered, func() { var ( - k8sClientReader *eventsfakes.FakeReader - fakeGraphGetter *telemetryfakes.FakeGraphGetter - dataCollector telemetry.DataCollector - version string - graph1, graph2 *graph.Graph - ctx context.Context + k8sClientReader *eventsfakes.FakeReader + fakeGraphGetter *telemetryfakes.FakeGraphGetter + fakeConfigurationGetter *telemetryfakes.FakeConfigurationGetter + dataCollector telemetry.DataCollector + version string + graph1, graph2 *graph.Graph + ctx context.Context + config1, config2 *dataplane.Configuration ) BeforeAll(func() { @@ -36,10 +40,14 @@ var _ = Describe("Collector", Ordered, func() { fakeGraphGetter = &telemetryfakes.FakeGraphGetter{} fakeGraphGetter.GetLatestGraphReturns(&graph.Graph{}) + fakeConfigurationGetter = &telemetryfakes.FakeConfigurationGetter{} + fakeConfigurationGetter.GetLatestConfigurationReturns(&dataplane.Configuration{}) + dataCollector = telemetry.NewDataCollector(telemetry.DataCollectorConfig{ - K8sClientReader: k8sClientReader, - GraphGetter: fakeGraphGetter, - Version: version, + K8sClientReader: k8sClientReader, + GraphGetter: fakeGraphGetter, + ConfigurationGetter: fakeConfigurationGetter, + Version: version, }) secret1 := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret1"}} @@ -88,6 +96,67 @@ var _ = Describe("Collector", Ordered, func() { client.ObjectKeyFromObject(nilsvc): nil, }, } + + config1 = &dataplane.Configuration{ + Upstreams: []dataplane.Upstream{ + { + Name: "upstream1", + ErrorMsg: "", + Endpoints: []resolver.Endpoint{ + { + Address: "endpoint1", + Port: 80, + }, + }, + }, + }, + } + config2 = &dataplane.Configuration{ + Upstreams: []dataplane.Upstream{ + { + Name: "upstream1", + ErrorMsg: "", + Endpoints: []resolver.Endpoint{ + { + Address: "endpoint1", + Port: 80, + }, { + Address: "endpoint2", + Port: 80, + }, { + Address: "endpoint3", + Port: 80, + }, + }, + }, + { + Name: "upstream2", + ErrorMsg: "", + Endpoints: []resolver.Endpoint{ + { + Address: "endpoint1", + Port: 80, + }, + }, + }, + { + Name: "upstream3", + ErrorMsg: "there is an error here", + Endpoints: []resolver.Endpoint{ + { + Address: "endpoint1", + Port: 80, + }, { + Address: "endpoint2", + Port: 80, + }, { + Address: "endpoint3", + Port: 80, + }, + }, + }, + }, + } }) When("retrieving node count data", func() { @@ -184,6 +253,7 @@ var _ = Describe("Collector", Ordered, func() { When("retrieving NGF resource counts", func() { It("generates correct data for graph with one of each resource", func() { fakeGraphGetter.GetLatestGraphReturns(graph1) + fakeConfigurationGetter.GetLatestConfigurationReturns(config1) expData := telemetry.Data{ ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, @@ -194,6 +264,7 @@ var _ = Describe("Collector", Ordered, func() { HTTPRoutes: 1, Secrets: 1, Services: 1, + Endpoints: 1, }, } @@ -205,6 +276,7 @@ var _ = Describe("Collector", Ordered, func() { It("generates correct data for graph with multiple of each resource", func() { fakeGraphGetter.GetLatestGraphReturns(graph2) + fakeConfigurationGetter.GetLatestConfigurationReturns(config2) expData := telemetry.Data{ ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, @@ -215,6 +287,7 @@ var _ = Describe("Collector", Ordered, func() { HTTPRoutes: 3, Secrets: 2, Services: 2, + Endpoints: 4, }, } @@ -228,6 +301,8 @@ var _ = Describe("Collector", Ordered, func() { When("it encounters an error while collecting data", func() { BeforeEach(func() { k8sClientReader.ListReturns(nil) + fakeGraphGetter.GetLatestGraphReturns(&graph.Graph{}) + fakeConfigurationGetter.GetLatestConfigurationReturns(&dataplane.Configuration{}) }) It("should error on client errors", func() { @@ -237,11 +312,18 @@ var _ = Describe("Collector", Ordered, func() { Expect(err).To(HaveOccurred()) }) - It("should error on latest graph errors", func() { + It("should error on nil latest graph", func() { fakeGraphGetter.GetLatestGraphReturns(nil) _, err := dataCollector.Collect(ctx) Expect(err).To(HaveOccurred()) }) + + It("should error on nil latest configuration", func() { + fakeConfigurationGetter.GetLatestConfigurationReturns(nil) + + _, err := dataCollector.Collect(ctx) + Expect(err).To(HaveOccurred()) + }) }) }) diff --git a/internal/mode/static/telemetry/telemetryfakes/fake_configuration_getter.go b/internal/mode/static/telemetry/telemetryfakes/fake_configuration_getter.go new file mode 100644 index 0000000000..841fff7679 --- /dev/null +++ b/internal/mode/static/telemetry/telemetryfakes/fake_configuration_getter.go @@ -0,0 +1,103 @@ +// Code generated by counterfeiter. DO NOT EDIT. +package telemetryfakes + +import ( + "sync" + + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/dataplane" + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry" +) + +type FakeConfigurationGetter struct { + GetLatestConfigurationStub func() *dataplane.Configuration + getLatestConfigurationMutex sync.RWMutex + getLatestConfigurationArgsForCall []struct { + } + getLatestConfigurationReturns struct { + result1 *dataplane.Configuration + } + getLatestConfigurationReturnsOnCall map[int]struct { + result1 *dataplane.Configuration + } + invocations map[string][][]interface{} + invocationsMutex sync.RWMutex +} + +func (fake *FakeConfigurationGetter) GetLatestConfiguration() *dataplane.Configuration { + fake.getLatestConfigurationMutex.Lock() + ret, specificReturn := fake.getLatestConfigurationReturnsOnCall[len(fake.getLatestConfigurationArgsForCall)] + fake.getLatestConfigurationArgsForCall = append(fake.getLatestConfigurationArgsForCall, struct { + }{}) + stub := fake.GetLatestConfigurationStub + fakeReturns := fake.getLatestConfigurationReturns + fake.recordInvocation("GetLatestConfiguration", []interface{}{}) + fake.getLatestConfigurationMutex.Unlock() + if stub != nil { + return stub() + } + if specificReturn { + return ret.result1 + } + return fakeReturns.result1 +} + +func (fake *FakeConfigurationGetter) GetLatestConfigurationCallCount() int { + fake.getLatestConfigurationMutex.RLock() + defer fake.getLatestConfigurationMutex.RUnlock() + return len(fake.getLatestConfigurationArgsForCall) +} + +func (fake *FakeConfigurationGetter) GetLatestConfigurationCalls(stub func() *dataplane.Configuration) { + fake.getLatestConfigurationMutex.Lock() + defer fake.getLatestConfigurationMutex.Unlock() + fake.GetLatestConfigurationStub = stub +} + +func (fake *FakeConfigurationGetter) GetLatestConfigurationReturns(result1 *dataplane.Configuration) { + fake.getLatestConfigurationMutex.Lock() + defer fake.getLatestConfigurationMutex.Unlock() + fake.GetLatestConfigurationStub = nil + fake.getLatestConfigurationReturns = struct { + result1 *dataplane.Configuration + }{result1} +} + +func (fake *FakeConfigurationGetter) GetLatestConfigurationReturnsOnCall(i int, result1 *dataplane.Configuration) { + fake.getLatestConfigurationMutex.Lock() + defer fake.getLatestConfigurationMutex.Unlock() + fake.GetLatestConfigurationStub = nil + if fake.getLatestConfigurationReturnsOnCall == nil { + fake.getLatestConfigurationReturnsOnCall = make(map[int]struct { + result1 *dataplane.Configuration + }) + } + fake.getLatestConfigurationReturnsOnCall[i] = struct { + result1 *dataplane.Configuration + }{result1} +} + +func (fake *FakeConfigurationGetter) Invocations() map[string][][]interface{} { + fake.invocationsMutex.RLock() + defer fake.invocationsMutex.RUnlock() + fake.getLatestConfigurationMutex.RLock() + defer fake.getLatestConfigurationMutex.RUnlock() + copiedInvocations := map[string][][]interface{}{} + for key, value := range fake.invocations { + copiedInvocations[key] = value + } + return copiedInvocations +} + +func (fake *FakeConfigurationGetter) recordInvocation(key string, args []interface{}) { + fake.invocationsMutex.Lock() + defer fake.invocationsMutex.Unlock() + if fake.invocations == nil { + fake.invocations = map[string][][]interface{}{} + } + if fake.invocations[key] == nil { + fake.invocations[key] = [][]interface{}{} + } + fake.invocations[key] = append(fake.invocations[key], args) +} + +var _ telemetry.ConfigurationGetter = new(FakeConfigurationGetter) From 130ce477d41d2f4e9f0160f6a10dd80f4e353672 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Jan 2024 13:41:50 -0800 Subject: [PATCH 17/43] Add nodes resource to RBAC --- deploy/helm-chart/templates/rbac.yaml | 1 + deploy/manifests/nginx-gateway.yaml | 1 + internal/mode/static/telemetry/collector.go | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/deploy/helm-chart/templates/rbac.yaml b/deploy/helm-chart/templates/rbac.yaml index ac49069f32..90414f7975 100644 --- a/deploy/helm-chart/templates/rbac.yaml +++ b/deploy/helm-chart/templates/rbac.yaml @@ -21,6 +21,7 @@ rules: - namespaces - services - secrets + - nodes verbs: - list - watch diff --git a/deploy/manifests/nginx-gateway.yaml b/deploy/manifests/nginx-gateway.yaml index 80fc304c05..84636e4026 100644 --- a/deploy/manifests/nginx-gateway.yaml +++ b/deploy/manifests/nginx-gateway.yaml @@ -32,6 +32,7 @@ rules: - namespaces - services - secrets + - nodes verbs: - list - watch diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index 80f1a9d787..d957c7ddb5 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -99,7 +99,7 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) { } func collectNodeCount(ctx context.Context, k8sClient client.Reader) (int, error) { - nodes := v1.NodeList{} + var nodes v1.NodeList if err := k8sClient.List(ctx, &nodes); err != nil { return 0, err } From 8b11f1d381faffd3bed2c41a6e67d5e581a6e764 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Jan 2024 14:44:04 -0800 Subject: [PATCH 18/43] Add additional documentation on exported types and functions --- internal/mode/static/state/change_processor.go | 1 + internal/mode/static/telemetry/collector.go | 15 +++++++++------ internal/mode/static/telemetry/collector_test.go | 2 +- internal/mode/static/telemetry/job.go | 1 + 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/internal/mode/static/state/change_processor.go b/internal/mode/static/state/change_processor.go index 9d07be7aa0..385ae0e938 100644 --- a/internal/mode/static/state/change_processor.go +++ b/internal/mode/static/state/change_processor.go @@ -62,6 +62,7 @@ type ChangeProcessor interface { // Process produces a graph-like representation of GatewayAPI resources. // If no changes were captured, the changed return argument will be NoChange and graph will be empty. Process() (changeType ChangeType, graphCfg *graph.Graph) + // GetLatestGraph returns the latest Graph. GetLatestGraph() *graph.Graph } diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index d957c7ddb5..de4587b967 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -3,6 +3,7 @@ package telemetry import ( "context" "errors" + "fmt" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" @@ -14,19 +15,19 @@ import ( //go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . GraphGetter -// GraphGetter gets the current Graph. +// GraphGetter gets the latest Graph. type GraphGetter interface { GetLatestGraph() *graph.Graph } //go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . ConfigurationGetter -// ConfigurationGetter gets the current Configuration. +// ConfigurationGetter gets the latest Configuration. type ConfigurationGetter interface { GetLatestConfiguration() *dataplane.Configuration } -// NGFResourceCounts stores the counts of all relevant Graph resources. +// NGFResourceCounts stores the counts of all relevant resources that NGF processes and generates configuration from. type NGFResourceCounts struct { Gateways int GatewayClasses int @@ -62,11 +63,13 @@ type DataCollectorConfig struct { Version string } +// DataCollectorImpl is am implementation of DataCollector. type DataCollectorImpl struct { cfg DataCollectorConfig } -func NewDataCollector( +// NewDataCollectorImpl creates a new DataCollectorImpl for a telemetry Job. +func NewDataCollectorImpl( cfg DataCollectorConfig, ) *DataCollectorImpl { return &DataCollectorImpl{ @@ -78,12 +81,12 @@ func NewDataCollector( func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) { nodeCount, err := collectNodeCount(ctx, c.cfg.K8sClientReader) if err != nil { - return Data{}, err + return Data{}, fmt.Errorf("failed to collect node count: %w", err) } graphResourceCount, err := collectGraphResourceCount(c.cfg.GraphGetter, c.cfg.ConfigurationGetter) if err != nil { - return Data{}, err + return Data{}, fmt.Errorf("failed to collect NGF resource counts: %w", err) } data := Data{ diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index d0f6b5af0d..a3ec8a28e7 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -43,7 +43,7 @@ var _ = Describe("Collector", Ordered, func() { fakeConfigurationGetter = &telemetryfakes.FakeConfigurationGetter{} fakeConfigurationGetter.GetLatestConfigurationReturns(&dataplane.Configuration{}) - dataCollector = telemetry.NewDataCollector(telemetry.DataCollectorConfig{ + dataCollector = telemetry.NewDataCollectorImpl(telemetry.DataCollectorConfig{ K8sClientReader: k8sClientReader, GraphGetter: fakeGraphGetter, ConfigurationGetter: fakeConfigurationGetter, diff --git a/internal/mode/static/telemetry/job.go b/internal/mode/static/telemetry/job.go index 62c20d5b0b..6888b5deb9 100644 --- a/internal/mode/static/telemetry/job.go +++ b/internal/mode/static/telemetry/job.go @@ -13,6 +13,7 @@ import ( // DataCollector collects telemetry data. type DataCollector interface { + // Collect collects and returns telemetry Data. Collect(ctx context.Context) (Data, error) } From 905f476de589a3a3c38386f3fa2082addd954004 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Jan 2024 16:34:10 -0800 Subject: [PATCH 19/43] Add lock to eventHandlerImpl --- internal/mode/static/handler.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/internal/mode/static/handler.go b/internal/mode/static/handler.go index ba7213273c..50cfb2afcb 100644 --- a/internal/mode/static/handler.go +++ b/internal/mode/static/handler.go @@ -3,6 +3,7 @@ package static import ( "context" "fmt" + "sync" "time" "github.com/go-logr/logr" @@ -75,6 +76,7 @@ type eventHandlerImpl struct { // latestConfiguration is the latest Configuration generation. latestConfiguration *dataplane.Configuration cfg eventHandlerConfig + lock sync.Mutex } // newEventHandlerImpl creates a new eventHandlerImpl. @@ -392,5 +394,8 @@ func getGatewayAddresses( } func (h *eventHandlerImpl) GetLatestConfiguration() *dataplane.Configuration { + h.lock.Lock() + defer h.lock.Unlock() + return h.latestConfiguration } From 1e455e5b290e8eb488ea61df7b561bad7abc1c46 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Jan 2024 17:11:22 -0800 Subject: [PATCH 20/43] Add GetLatestGraph into change processor tests --- .../static/state/change_processor_test.go | 42 ++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/internal/mode/static/state/change_processor_test.go b/internal/mode/static/state/change_processor_test.go index 21456ef0b6..0caf207e93 100644 --- a/internal/mode/static/state/change_processor_test.go +++ b/internal/mode/static/state/change_processor_test.go @@ -520,6 +520,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.NoChange)) Expect(graphCfg).To(BeNil()) + Expect(processor.GetLatestGraph()).To(BeNil()) }) }) When("GatewayClass doesn't exist", func() { @@ -530,6 +531,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(&graph.Graph{}, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(&graph.Graph{}, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("Gateways don't exist", func() { @@ -540,6 +542,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(&graph.Graph{}, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(&graph.Graph{}, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the different namespace TLS Secret is upserted", func() { @@ -549,6 +552,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.NoChange)) Expect(graphCfg).To(BeNil()) + Expect(helpers.Diff(&graph.Graph{}, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the first Gateway is upserted", func() { @@ -584,6 +588,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) }) @@ -637,6 +642,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the ReferenceGrant allowing the Gateway to reference its Secret is upserted", func() { @@ -659,6 +665,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the ReferenceGrant allowing the hr1 to reference the Service in different ns is upserted", func() { @@ -672,6 +679,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the Gateway API CRD with bundle version annotation change is processed", func() { @@ -689,6 +697,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the Gateway API CRD without bundle version annotation change is processed", func() { @@ -697,9 +706,18 @@ var _ = Describe("ChangeProcessor", func() { processor.CaptureUpsertChange(gatewayAPICRDSameVersion) + expGraph.ReferencedSecrets[client.ObjectKeyFromObject(diffNsTLSSecret)] = &graph.Secret{ + Source: diffNsTLSSecret, + } + + expGraph.GatewayClass.Conditions = conditions.NewGatewayClassSupportedVersionBestEffort( + gatewayclass.SupportedVersion, + ) + changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.NoChange)) Expect(graphCfg).To(BeNil()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the Gateway API CRD with bundle version annotation change is processed", func() { @@ -714,6 +732,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the first HTTPRoute update with a generation changed is processed", func() { @@ -732,6 +751,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }, ) }) @@ -747,6 +767,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the GatewayClass update with generation change is processed", func() { @@ -761,6 +782,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the different namespace TLS secret is upserted again", func() { @@ -774,24 +796,33 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("no changes are captured", func() { It("returns nil graph", func() { - changed, graphCfg := processor.Process() + expGraph.ReferencedSecrets[client.ObjectKeyFromObject(diffNsTLSSecret)] = &graph.Secret{ + Source: diffNsTLSSecret, + } + changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.NoChange)) Expect(graphCfg).To(BeNil()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the same namespace TLS Secret is upserted", func() { It("returns nil graph", func() { processor.CaptureUpsertChange(sameNsTLSSecret) - changed, graphCfg := processor.Process() + expGraph.ReferencedSecrets[client.ObjectKeyFromObject(diffNsTLSSecret)] = &graph.Secret{ + Source: diffNsTLSSecret, + } + changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.NoChange)) Expect(graphCfg).To(BeNil()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the second Gateway is upserted", func() { @@ -808,6 +839,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the second HTTPRoute is upserted", func() { @@ -833,6 +865,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the first Gateway is deleted", func() { @@ -868,6 +901,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the second HTTPRoute is deleted", func() { @@ -900,6 +934,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the GatewayClass is deleted", func() { @@ -923,6 +958,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(expGraph, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(expGraph, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the second Gateway is deleted", func() { @@ -938,6 +974,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(&graph.Graph{}, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(&graph.Graph{}, processor.GetLatestGraph())).To(BeEmpty()) }) }) When("the first HTTPRoute is deleted", func() { @@ -953,6 +990,7 @@ var _ = Describe("ChangeProcessor", func() { changed, graphCfg := processor.Process() Expect(changed).To(Equal(state.ClusterStateChange)) Expect(helpers.Diff(&graph.Graph{}, graphCfg)).To(BeEmpty()) + Expect(helpers.Diff(&graph.Graph{}, processor.GetLatestGraph())).To(BeEmpty()) }) }) }) From 4062c72008f2363f888fde738ff060855a10b40a Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 29 Jan 2024 17:31:28 -0800 Subject: [PATCH 21/43] Add expData to job test --- internal/mode/static/telemetry/job_test.go | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/internal/mode/static/telemetry/job_test.go b/internal/mode/static/telemetry/job_test.go index 76e2124c74..a54cccedcf 100644 --- a/internal/mode/static/telemetry/job_test.go +++ b/internal/mode/static/telemetry/job_test.go @@ -18,6 +18,7 @@ var _ = Describe("Job", func() { job *telemetry.Job exporter *telemetryfakes.FakeExporter dataCollector *telemetryfakes.FakeDataCollector + expData telemetry.Data ) const timeout = 10 * time.Second @@ -30,6 +31,20 @@ var _ = Describe("Job", func() { Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the Job should report a few times DataCollector: dataCollector, }) + + expData = telemetry.Data{ + ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: "1.1"}, + NodeCount: 3, + NGFResourceCounts: telemetry.NGFResourceCounts{ + Gateways: 1, + GatewayClasses: 1, + HTTPRoutes: 1, + Secrets: 1, + Services: 1, + Endpoints: 1, + }, + } + dataCollector.CollectReturns(expData, nil) }) DescribeTable( @@ -51,7 +66,7 @@ var _ = Describe("Job", func() { Eventually(exporter.ExportCallCount).Should(BeNumerically(">=", minReports)) for i := 0; i < minReports; i++ { _, data := exporter.ExportArgsForCall(i) - Expect(data).To(Equal(telemetry.Data{})) + Expect(data).To(Equal(expData)) } cancel() From c9749a621938de32dc111065254d33da34197412 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 31 Jan 2024 10:26:50 -0800 Subject: [PATCH 22/43] Add GetLatestConfiguration to handler tests --- internal/mode/static/handler_test.go | 38 ++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/internal/mode/static/handler_test.go b/internal/mode/static/handler_test.go index 9adbf6aeca..a2fd0b3a21 100644 --- a/internal/mode/static/handler_test.go +++ b/internal/mode/static/handler_test.go @@ -132,8 +132,11 @@ var _ = Describe("eventHandler", func() { handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + dcfg := &dataplane.Configuration{Version: 1} + checkUpsertEventExpectations(e) - expectReconfig(dataplane.Configuration{Version: 1}, fakeCfgFiles) + expectReconfig(*dcfg, fakeCfgFiles) + Expect(helpers.Diff(handler.GetLatestConfiguration(), dcfg)).To(BeEmpty()) }) It("should process Delete", func() { @@ -145,8 +148,11 @@ var _ = Describe("eventHandler", func() { handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + dcfg := &dataplane.Configuration{Version: 1} + checkDeleteEventExpectations(e) - expectReconfig(dataplane.Configuration{Version: 1}, fakeCfgFiles) + expectReconfig(*dcfg, fakeCfgFiles) + Expect(helpers.Diff(handler.GetLatestConfiguration(), dcfg)).To(BeEmpty()) }) }) @@ -165,6 +171,7 @@ var _ = Describe("eventHandler", func() { checkDeleteEventExpectations(deleteEvent) handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + Expect(helpers.Diff(handler.GetLatestConfiguration(), &dataplane.Configuration{Version: 2})).To(BeEmpty()) }) }) }) @@ -199,6 +206,8 @@ var _ = Describe("eventHandler", func() { batch := []interface{}{&events.UpsertEvent{Resource: cfg(ngfAPI.ControllerLogLevelError)}} handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + Expect(handler.GetLatestConfiguration()).To(BeNil()) + Expect(fakeStatusUpdater.UpdateCallCount()).Should(Equal(1)) _, statuses := fakeStatusUpdater.UpdateArgsForCall(0) Expect(statuses).To(Equal(expStatuses(staticConds.NewNginxGatewayValid()))) @@ -210,6 +219,8 @@ var _ = Describe("eventHandler", func() { batch := []interface{}{&events.UpsertEvent{Resource: cfg(ngfAPI.ControllerLogLevel("invalid"))}} handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + Expect(handler.GetLatestConfiguration()).To(BeNil()) + Expect(fakeStatusUpdater.UpdateCallCount()).Should(Equal(1)) _, statuses := fakeStatusUpdater.UpdateArgsForCall(0) cond := staticConds.NewNginxGatewayInvalid( @@ -228,6 +239,9 @@ var _ = Describe("eventHandler", func() { It("handles a deleted config", func() { batch := []interface{}{&events.DeleteEvent{Type: &ngfAPI.NginxGateway{}}} handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + + Expect(handler.GetLatestConfiguration()).To(BeNil()) + Expect(len(fakeEventRecorder.Events)).To(Equal(1)) event := <-fakeEventRecorder.Events Expect(event).To(Equal("Warning ResourceDeleted NginxGateway configuration was deleted; using defaults")) @@ -253,6 +267,8 @@ var _ = Describe("eventHandler", func() { handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + Expect(handler.GetLatestConfiguration()).To(BeNil()) + Expect(fakeStatusUpdater.UpdateAddressesCallCount()).To(BeZero()) }) @@ -266,6 +282,8 @@ var _ = Describe("eventHandler", func() { batch := []interface{}{e} handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + + Expect(handler.GetLatestConfiguration()).To(BeNil()) Expect(fakeStatusUpdater.UpdateAddressesCallCount()).ToNot(BeZero()) }) @@ -280,6 +298,8 @@ var _ = Describe("eventHandler", func() { batch := []interface{}{e} handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + + Expect(handler.GetLatestConfiguration()).To(BeNil()) Expect(fakeStatusUpdater.UpdateAddressesCallCount()).ToNot(BeZero()) }) }) @@ -310,6 +330,8 @@ var _ = Describe("eventHandler", func() { fakeNginxRuntimeMgr.IsPlusReturns(true) handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + Expect(helpers.Diff(handler.GetLatestConfiguration(), &dataplane.Configuration{Version: 1})).To(BeEmpty()) + Expect(fakeGenerator.GenerateCallCount()).To(Equal(1)) Expect(fakeNginxFileMgr.ReplaceFilesCallCount()).To(Equal(1)) Expect(fakeNginxRuntimeMgr.GetUpstreamsCallCount()).To(Equal(1)) @@ -319,6 +341,8 @@ var _ = Describe("eventHandler", func() { When("not running NGINX Plus", func() { It("should not call the NGINX Plus API", func() { handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + Expect(helpers.Diff(handler.GetLatestConfiguration(), &dataplane.Configuration{Version: 1})).To(BeEmpty()) + Expect(fakeGenerator.GenerateCallCount()).To(Equal(1)) Expect(fakeNginxFileMgr.ReplaceFilesCallCount()).To(Equal(1)) Expect(fakeNginxRuntimeMgr.GetUpstreamsCallCount()).To(Equal(0)) @@ -410,6 +434,9 @@ var _ = Describe("eventHandler", func() { Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed()) handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + + Expect(helpers.Diff(handler.GetLatestConfiguration(), &dataplane.Configuration{Version: 1})).To(BeEmpty()) + Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed()) }) @@ -419,6 +446,9 @@ var _ = Describe("eventHandler", func() { Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed()) handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + + Expect(handler.GetLatestConfiguration()).To(BeNil()) + Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed()) }) @@ -446,6 +476,8 @@ var _ = Describe("eventHandler", func() { handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) + Expect(helpers.Diff(handler.GetLatestConfiguration(), &dataplane.Configuration{Version: 2})).To(BeEmpty()) + Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed()) }) @@ -458,6 +490,8 @@ var _ = Describe("eventHandler", func() { } Expect(handle).Should(Panic()) + + Expect(handler.GetLatestConfiguration()).To(BeNil()) }) }) From 73e390dc7ddc0e161cc9a1ccf7287d4959896348 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 31 Jan 2024 10:55:20 -0800 Subject: [PATCH 23/43] Revert ReferencedServices to no longer store services --- .../static/state/change_processor_test.go | 4 +- internal/mode/static/state/graph/graph.go | 6 +-- .../mode/static/state/graph/graph_test.go | 8 +-- internal/mode/static/state/graph/service.go | 8 ++- .../mode/static/state/graph/service_test.go | 51 ++++++++----------- internal/mode/static/telemetry/collector.go | 19 +------ .../mode/static/telemetry/collector_test.go | 16 +++--- 7 files changed, 43 insertions(+), 69 deletions(-) diff --git a/internal/mode/static/state/change_processor_test.go b/internal/mode/static/state/change_processor_test.go index 0caf207e93..dba175af51 100644 --- a/internal/mode/static/state/change_processor_test.go +++ b/internal/mode/static/state/change_processor_test.go @@ -507,11 +507,11 @@ var _ = Describe("ChangeProcessor", func() { {Namespace: "test", Name: "hr-1"}: expRouteHR1, }, ReferencedSecrets: map[types.NamespacedName]*graph.Secret{}, - ReferencedServices: map[types.NamespacedName]*apiv1.Service{ + ReferencedServices: map[types.NamespacedName]struct{}{ { Namespace: "service-ns", Name: "service", - }: nil, + }: {}, }, } }) diff --git a/internal/mode/static/state/graph/graph.go b/internal/mode/static/state/graph/graph.go index f8a2f93bd0..556c38496b 100644 --- a/internal/mode/static/state/graph/graph.go +++ b/internal/mode/static/state/graph/graph.go @@ -49,8 +49,8 @@ type Graph struct { // ReferencedNamespaces includes Namespaces with labels that match the Gateway Listener's label selector. ReferencedNamespaces map[types.NamespacedName]*v1.Namespace // ReferencedServices includes the NamespacedNames of all the Services that are referenced by at least one HTTPRoute. - // Non-existing Services will have their NamespacedName mapped to nil. - ReferencedServices map[types.NamespacedName]*v1.Service + // Storing the whole resource is not necessary, compared to the similar maps above. + ReferencedServices map[types.NamespacedName]struct{} } // ProtectedPorts are the ports that may not be configured by a listener with a descriptive name of each port. @@ -123,7 +123,7 @@ func BuildGraph( referencedNamespaces := buildReferencedNamespaces(state.Namespaces, gw) - referencedServices := buildReferencedServices(routes, state.Services) + referencedServices := buildReferencedServices(routes) g := &Graph{ GatewayClass: gc, diff --git a/internal/mode/static/state/graph/graph_test.go b/internal/mode/static/state/graph/graph_test.go index fa5c75a942..ed10c01d51 100644 --- a/internal/mode/static/state/graph/graph_test.go +++ b/internal/mode/static/state/graph/graph_test.go @@ -347,8 +347,8 @@ func TestBuildGraph(t *testing.T) { ReferencedNamespaces: map[types.NamespacedName]*v1.Namespace{ client.ObjectKeyFromObject(ns): ns, }, - ReferencedServices: map[types.NamespacedName]*v1.Service{ - client.ObjectKeyFromObject(svc): svc, + ReferencedServices: map[types.NamespacedName]struct{}{ + client.ObjectKeyFromObject(svc): {}, }, } } @@ -504,8 +504,8 @@ func TestIsReferenced(t *testing.T) { ReferencedNamespaces: map[types.NamespacedName]*v1.Namespace{ client.ObjectKeyFromObject(nsInGraph): nsInGraph, }, - ReferencedServices: map[types.NamespacedName]*v1.Service{ - client.ObjectKeyFromObject(serviceInGraph): serviceInGraph, + ReferencedServices: map[types.NamespacedName]struct{}{ + client.ObjectKeyFromObject(serviceInGraph): {}, }, } diff --git a/internal/mode/static/state/graph/service.go b/internal/mode/static/state/graph/service.go index 3f14bf098f..9568b59df1 100644 --- a/internal/mode/static/state/graph/service.go +++ b/internal/mode/static/state/graph/service.go @@ -1,15 +1,13 @@ package graph import ( - v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" ) func buildReferencedServices( routes map[types.NamespacedName]*Route, - services map[types.NamespacedName]*v1.Service, -) map[types.NamespacedName]*v1.Service { - svcNames := make(map[types.NamespacedName]*v1.Service) +) map[types.NamespacedName]struct{} { + svcNames := make(map[types.NamespacedName]struct{}) // routes all have populated ParentRefs from when they were created. // @@ -36,7 +34,7 @@ func buildReferencedServices( // Processes both valid and invalid BackendRefs as invalid ones still have referenced services // we may want to track. if ref.SvcNsName != (types.NamespacedName{}) { - svcNames[ref.SvcNsName] = services[ref.SvcNsName] + svcNames[ref.SvcNsName] = struct{}{} } } } diff --git a/internal/mode/static/state/graph/service_test.go b/internal/mode/static/state/graph/service_test.go index b69219fa8d..2eb0aaf646 100644 --- a/internal/mode/static/state/graph/service_test.go +++ b/internal/mode/static/state/graph/service_test.go @@ -4,9 +4,6 @@ import ( "testing" . "github.com/onsi/gomega" - v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" ) @@ -194,15 +191,9 @@ func TestBuildReferencedServices(t *testing.T) { }, } - services := map[types.NamespacedName]*v1.Service{ - {Namespace: "banana-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "banana-service"}}, - {Namespace: "service-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "service"}}, - {Namespace: "service-ns2", Name: "service2"}: {ObjectMeta: metav1.ObjectMeta{Name: "service2"}}, - } - tests := []struct { routes map[types.NamespacedName]*Route - exp map[types.NamespacedName]*v1.Service + exp map[types.NamespacedName]struct{} name string }{ { @@ -210,8 +201,8 @@ func TestBuildReferencedServices(t *testing.T) { routes: map[types.NamespacedName]*Route{ {Name: "normal-route"}: normalRoute, }, - exp: map[types.NamespacedName]*v1.Service{ - {Namespace: "banana-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "banana-service"}}, + exp: map[types.NamespacedName]struct{}{ + {Namespace: "banana-ns", Name: "service"}: {}, }, }, { @@ -219,9 +210,9 @@ func TestBuildReferencedServices(t *testing.T) { routes: map[types.NamespacedName]*Route{ {Name: "two-svc-one-rule"}: validRouteTwoServicesOneRule, }, - exp: map[types.NamespacedName]*v1.Service{ - {Namespace: "service-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "service"}}, - {Namespace: "service-ns2", Name: "service2"}: {ObjectMeta: metav1.ObjectMeta{Name: "service2"}}, + exp: map[types.NamespacedName]struct{}{ + {Namespace: "service-ns", Name: "service"}: {}, + {Namespace: "service-ns2", Name: "service2"}: {}, }, }, { @@ -229,9 +220,9 @@ func TestBuildReferencedServices(t *testing.T) { routes: map[types.NamespacedName]*Route{ {Name: "one-svc-per-rule"}: validRouteTwoServicesTwoRules, }, - exp: map[types.NamespacedName]*v1.Service{ - {Namespace: "service-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "service"}}, - {Namespace: "service-ns2", Name: "service2"}: {ObjectMeta: metav1.ObjectMeta{Name: "service2"}}, + exp: map[types.NamespacedName]struct{}{ + {Namespace: "service-ns", Name: "service"}: {}, + {Namespace: "service-ns2", Name: "service2"}: {}, }, }, { @@ -240,9 +231,9 @@ func TestBuildReferencedServices(t *testing.T) { {Name: "one-svc-per-rule"}: validRouteTwoServicesTwoRules, {Name: "two-svc-one-rule"}: validRouteTwoServicesOneRule, }, - exp: map[types.NamespacedName]*v1.Service{ - {Namespace: "service-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "service"}}, - {Namespace: "service-ns2", Name: "service2"}: {ObjectMeta: metav1.ObjectMeta{Name: "service2"}}, + exp: map[types.NamespacedName]struct{}{ + {Namespace: "service-ns", Name: "service"}: {}, + {Namespace: "service-ns2", Name: "service2"}: {}, }, }, { @@ -251,10 +242,10 @@ func TestBuildReferencedServices(t *testing.T) { {Name: "one-svc-per-rule"}: validRouteTwoServicesTwoRules, {Name: "normal-route"}: normalRoute, }, - exp: map[types.NamespacedName]*v1.Service{ - {Namespace: "service-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "service"}}, - {Namespace: "service-ns2", Name: "service2"}: {ObjectMeta: metav1.ObjectMeta{Name: "service2"}}, - {Namespace: "banana-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "banana-service"}}, + exp: map[types.NamespacedName]struct{}{ + {Namespace: "service-ns", Name: "service"}: {}, + {Namespace: "service-ns2", Name: "service2"}: {}, + {Namespace: "banana-ns", Name: "service"}: {}, }, }, { @@ -277,8 +268,8 @@ func TestBuildReferencedServices(t *testing.T) { {Name: "normal-route"}: normalRoute, {Name: "invalid-route"}: invalidRoute, }, - exp: map[types.NamespacedName]*v1.Service{ - {Namespace: "banana-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "banana-service"}}, + exp: map[types.NamespacedName]struct{}{ + {Namespace: "banana-ns", Name: "service"}: {}, }, }, { @@ -286,8 +277,8 @@ func TestBuildReferencedServices(t *testing.T) { routes: map[types.NamespacedName]*Route{ {Name: "multiple-parent-ref-route"}: attachedRouteWithManyParentRefs, }, - exp: map[types.NamespacedName]*v1.Service{ - {Namespace: "service-ns", Name: "service"}: {ObjectMeta: metav1.ObjectMeta{Name: "service"}}, + exp: map[types.NamespacedName]struct{}{ + {Namespace: "service-ns", Name: "service"}: {}, }, }, { @@ -302,7 +293,7 @@ func TestBuildReferencedServices(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { g := NewWithT(t) - g.Expect(buildReferencedServices(test.routes, services)).To(Equal(test.exp)) + g.Expect(buildReferencedServices(test.routes)).To(Equal(test.exp)) }) } } diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index de4587b967..cb80b401c0 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -6,7 +6,6 @@ import ( "fmt" v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/dataplane" @@ -132,8 +131,8 @@ func collectGraphResourceCount( ngfResourceCounts.Gateways = 1 } ngfResourceCounts.HTTPRoutes = len(g.Routes) - ngfResourceCounts.Secrets = countReferencedResources(g.ReferencedSecrets) - ngfResourceCounts.Services = countReferencedResources(g.ReferencedServices) + ngfResourceCounts.Secrets = len(g.ReferencedSecrets) + ngfResourceCounts.Services = len(g.ReferencedServices) for _, upstream := range cfg.Upstreams { if upstream.ErrorMsg == "" { @@ -143,17 +142,3 @@ func collectGraphResourceCount( return ngfResourceCounts, nil } - -// countReferencedResources counts the amount of non-nil resources. -func countReferencedResources[T comparable](referencedMap map[types.NamespacedName]T) int { - var count int - // because we can't compare T to nil, we need to use the zeroValue of T - var zeroValue T - - for name := range referencedMap { - if referencedMap[name] != zeroValue { - count++ - } - } - return count -} diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index a3ec8a28e7..511bb469c7 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -69,8 +69,8 @@ var _ = Describe("Collector", Ordered, func() { Source: secret1, }, }, - ReferencedServices: map[types.NamespacedName]*v1.Service{ - client.ObjectKeyFromObject(svc1): svc1, + ReferencedServices: map[types.NamespacedName]struct{}{ + client.ObjectKeyFromObject(svc1): {}, }, } graph2 = &graph.Graph{ @@ -90,10 +90,10 @@ var _ = Describe("Collector", Ordered, func() { }, client.ObjectKeyFromObject(nilsecret): nil, }, - ReferencedServices: map[types.NamespacedName]*v1.Service{ - client.ObjectKeyFromObject(svc1): svc1, - client.ObjectKeyFromObject(svc2): svc2, - client.ObjectKeyFromObject(nilsvc): nil, + ReferencedServices: map[types.NamespacedName]struct{}{ + client.ObjectKeyFromObject(svc1): {}, + client.ObjectKeyFromObject(svc2): {}, + client.ObjectKeyFromObject(nilsvc): {}, }, } @@ -285,8 +285,8 @@ var _ = Describe("Collector", Ordered, func() { Gateways: 1, GatewayClasses: 1, HTTPRoutes: 3, - Secrets: 2, - Services: 2, + Secrets: 3, + Services: 3, Endpoints: 4, }, } From 19916435518631e2799576301b37bdcfad1a1e3e Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 31 Jan 2024 18:14:30 -0800 Subject: [PATCH 24/43] Add feedback for collector tests --- internal/mode/static/handler.go | 8 + .../mode/static/state/graph/service_test.go | 1 + internal/mode/static/telemetry/collector.go | 12 +- .../mode/static/telemetry/collector_test.go | 478 +++++++++--------- internal/mode/static/telemetry/job.go | 3 + 5 files changed, 264 insertions(+), 238 deletions(-) diff --git a/internal/mode/static/handler.go b/internal/mode/static/handler.go index 50cfb2afcb..3a2ebcd4f0 100644 --- a/internal/mode/static/handler.go +++ b/internal/mode/static/handler.go @@ -116,7 +116,11 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, logger logr.Log case state.EndpointsOnlyChange: h.cfg.version++ cfg := dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version) + + h.lock.Lock() h.latestConfiguration = &cfg + h.lock.Unlock() + err = h.updateUpstreamServers( ctx, logger, @@ -125,7 +129,11 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, logger logr.Log case state.ClusterStateChange: h.cfg.version++ cfg := dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version) + + h.lock.Lock() h.latestConfiguration = &cfg + h.lock.Unlock() + err = h.updateNginxConf( ctx, cfg, diff --git a/internal/mode/static/state/graph/service_test.go b/internal/mode/static/state/graph/service_test.go index 2eb0aaf646..82cf2960e0 100644 --- a/internal/mode/static/state/graph/service_test.go +++ b/internal/mode/static/state/graph/service_test.go @@ -4,6 +4,7 @@ import ( "testing" . "github.com/onsi/gomega" + "k8s.io/apimachinery/pkg/types" ) diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index cb80b401c0..36275a1587 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -118,18 +118,22 @@ func collectGraphResourceCount( cfg := configurationGetter.GetLatestConfiguration() if g == nil { - return NGFResourceCounts{}, errors.New("latest graph cannot be nil") + return ngfResourceCounts, errors.New("latest graph cannot be nil") } if cfg == nil { - return NGFResourceCounts{}, errors.New("latest configuration cannot be nil") + return ngfResourceCounts, errors.New("latest configuration cannot be nil") } + ngfResourceCounts.GatewayClasses = len(g.IgnoredGatewayClasses) if g.GatewayClass != nil { - ngfResourceCounts.GatewayClasses = 1 + ngfResourceCounts.GatewayClasses++ } + + ngfResourceCounts.Gateways = len(g.IgnoredGateways) if g.Gateway != nil { - ngfResourceCounts.Gateways = 1 + ngfResourceCounts.Gateways++ } + ngfResourceCounts.HTTPRoutes = len(g.Routes) ngfResourceCounts.Secrets = len(g.ReferencedSecrets) ngfResourceCounts.Services = len(g.ReferencedServices) diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index 511bb469c7..28580b6cd7 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -11,6 +11,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" "github.com/nginxinc/nginx-gateway-fabric/internal/framework/events/eventsfakes" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/dataplane" @@ -27,20 +28,27 @@ var _ = Describe("Collector", Ordered, func() { fakeConfigurationGetter *telemetryfakes.FakeConfigurationGetter dataCollector telemetry.DataCollector version string - graph1, graph2 *graph.Graph + expData telemetry.Data ctx context.Context - config1, config2 *dataplane.Configuration ) BeforeAll(func() { ctx = context.Background() version = "1.1" + }) + + BeforeEach(func() { + expData = telemetry.Data{ + ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, + NodeCount: 0, + NGFResourceCounts: telemetry.NGFResourceCounts{}, + } k8sClientReader = &eventsfakes.FakeReader{} fakeGraphGetter = &telemetryfakes.FakeGraphGetter{} - fakeGraphGetter.GetLatestGraphReturns(&graph.Graph{}) - fakeConfigurationGetter = &telemetryfakes.FakeConfigurationGetter{} + + fakeGraphGetter.GetLatestGraphReturns(&graph.Graph{}) fakeConfigurationGetter.GetLatestConfigurationReturns(&dataplane.Configuration{}) dataCollector = telemetry.NewDataCollectorImpl(telemetry.DataCollectorConfig{ @@ -49,281 +57,283 @@ var _ = Describe("Collector", Ordered, func() { ConfigurationGetter: fakeConfigurationGetter, Version: version, }) + }) - secret1 := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret1"}} - secret2 := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret2"}} - nilsecret := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "nilsecret"}} - - svc1 := &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "svc1"}} - svc2 := &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "svc2"}} - nilsvc := &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "nilsvc"}} - - graph1 = &graph.Graph{ - GatewayClass: &graph.GatewayClass{}, - Gateway: &graph.Gateway{}, - Routes: map[types.NamespacedName]*graph.Route{ - {Namespace: "test", Name: "hr-1"}: {}, - }, - ReferencedSecrets: map[types.NamespacedName]*graph.Secret{ - client.ObjectKeyFromObject(secret1): { - Source: secret1, - }, - }, - ReferencedServices: map[types.NamespacedName]struct{}{ - client.ObjectKeyFromObject(svc1): {}, - }, - } - graph2 = &graph.Graph{ - GatewayClass: &graph.GatewayClass{}, - Gateway: &graph.Gateway{}, - Routes: map[types.NamespacedName]*graph.Route{ - {Namespace: "test", Name: "hr-1"}: {}, - {Namespace: "test", Name: "hr-2"}: {}, - {Namespace: "test", Name: "hr-3"}: {}, - }, - ReferencedSecrets: map[types.NamespacedName]*graph.Secret{ - client.ObjectKeyFromObject(secret1): { - Source: secret1, - }, - client.ObjectKeyFromObject(secret2): { - Source: secret2, - }, - client.ObjectKeyFromObject(nilsecret): nil, - }, - ReferencedServices: map[types.NamespacedName]struct{}{ - client.ObjectKeyFromObject(svc1): {}, - client.ObjectKeyFromObject(svc2): {}, - client.ObjectKeyFromObject(nilsvc): {}, - }, - } + Describe("Normal case", func() { + When("collecting telemetry data", func() { + It("collects all fields", func() { + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "node1"}, + } + node2 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "node2"}, + } + node3 := v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "node3"}, + } - config1 = &dataplane.Configuration{ - Upstreams: []dataplane.Upstream{ - { - Name: "upstream1", - ErrorMsg: "", - Endpoints: []resolver.Endpoint{ - { - Address: "endpoint1", - Port: 80, + k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { + Expect(option).To(BeEmpty()) + + switch typedList := list.(type) { + case *v1.NodeList: + typedList.Items = append(typedList.Items, node, node2, node3) + default: + Fail(fmt.Sprintf("unknown type: %T", typedList)) + } + return nil + }) + + secret1 := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret1"}} + secret2 := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret2"}} + nilsecret := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "nilsecret"}} + + svc1 := &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "svc1"}} + svc2 := &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "svc2"}} + nilsvc := &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "nilsvc"}} + + graph := &graph.Graph{ + GatewayClass: &graph.GatewayClass{}, + Gateway: &graph.Gateway{}, + IgnoredGatewayClasses: map[types.NamespacedName]*gatewayv1.GatewayClass{}, + Routes: map[types.NamespacedName]*graph.Route{ + {Namespace: "test", Name: "hr-1"}: {}, + {Namespace: "test", Name: "hr-2"}: {}, + {Namespace: "test", Name: "hr-3"}: {}, + }, + ReferencedSecrets: map[types.NamespacedName]*graph.Secret{ + client.ObjectKeyFromObject(secret1): { + Source: secret1, + }, + client.ObjectKeyFromObject(secret2): { + Source: secret2, }, + client.ObjectKeyFromObject(nilsecret): nil, }, - }, - }, - } - config2 = &dataplane.Configuration{ - Upstreams: []dataplane.Upstream{ - { - Name: "upstream1", - ErrorMsg: "", - Endpoints: []resolver.Endpoint{ + ReferencedServices: map[types.NamespacedName]struct{}{ + client.ObjectKeyFromObject(svc1): {}, + client.ObjectKeyFromObject(svc2): {}, + client.ObjectKeyFromObject(nilsvc): {}, + }, + } + + config := &dataplane.Configuration{ + Upstreams: []dataplane.Upstream{ { - Address: "endpoint1", - Port: 80, - }, { - Address: "endpoint2", - Port: 80, - }, { - Address: "endpoint3", - Port: 80, + Name: "upstream1", + ErrorMsg: "", + Endpoints: []resolver.Endpoint{ + { + Address: "endpoint1", + Port: 80, + }, { + Address: "endpoint2", + Port: 80, + }, { + Address: "endpoint3", + Port: 80, + }, + }, }, - }, - }, - { - Name: "upstream2", - ErrorMsg: "", - Endpoints: []resolver.Endpoint{ { - Address: "endpoint1", - Port: 80, + Name: "upstream2", + ErrorMsg: "", + Endpoints: []resolver.Endpoint{ + { + Address: "endpoint1", + Port: 80, + }, + }, }, - }, - }, - { - Name: "upstream3", - ErrorMsg: "there is an error here", - Endpoints: []resolver.Endpoint{ { - Address: "endpoint1", - Port: 80, - }, { - Address: "endpoint2", - Port: 80, - }, { - Address: "endpoint3", - Port: 80, + Name: "upstream3", + ErrorMsg: "there is an error here", + Endpoints: []resolver.Endpoint{ + { + Address: "endpoint1", + Port: 80, + }, { + Address: "endpoint2", + Port: 80, + }, { + Address: "endpoint3", + Port: 80, + }, + }, }, }, - }, - }, - } - }) + } - When("retrieving node count data", func() { - It("generates correct data for no nodes", func() { - k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { - Expect(option).To(BeEmpty()) + fakeGraphGetter.GetLatestGraphReturns(graph) + fakeConfigurationGetter.GetLatestConfigurationReturns(config) - switch typedList := list.(type) { - case *v1.NodeList: - typedList.Items = []v1.Node{} - default: - Fail(fmt.Sprintf("unknown type: %T", typedList)) + expData.NodeCount = 3 + expData.NGFResourceCounts = telemetry.NGFResourceCounts{ + Gateways: 1, + GatewayClasses: 1, + HTTPRoutes: 3, + Secrets: 3, + Services: 3, + Endpoints: 4, } - return nil - }) - - expData := telemetry.Data{ - ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, - NodeCount: 0, - NGFResourceCounts: telemetry.NGFResourceCounts{}, - } - data, err := dataCollector.Collect(ctx) + data, err := dataCollector.Collect(ctx) - Expect(err).To(BeNil()) - Expect(expData).To(Equal(data)) + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) }) + }) - It("generates correct data for one node", func() { - node := v1.Node{ - ObjectMeta: metav1.ObjectMeta{Name: "node1"}, - } - - k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { - Expect(option).To(BeEmpty()) - - switch typedList := list.(type) { - case *v1.NodeList: - typedList.Items = append(typedList.Items, node) - default: - Fail(fmt.Sprintf("unknown type: %T", typedList)) - } - return nil + Describe("node count collector", func() { + When("collecting node count data", func() { + It("collects correct data for no nodes", func() { + k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { + Expect(option).To(BeEmpty()) + + switch typedList := list.(type) { + case *v1.NodeList: + typedList.Items = []v1.Node{} + default: + Fail(fmt.Sprintf("unknown type: %T", typedList)) + } + return nil + }) + + data, err := dataCollector.Collect(ctx) + + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) }) - expData := telemetry.Data{ - ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, - NodeCount: 1, - NGFResourceCounts: telemetry.NGFResourceCounts{}, - } + It("collects correct data for one node", func() { + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: "node1"}, + } - data, err := dataCollector.Collect(ctx) + k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { + Expect(option).To(BeEmpty()) - Expect(err).To(BeNil()) - Expect(expData).To(Equal(data)) - }) + switch typedList := list.(type) { + case *v1.NodeList: + typedList.Items = append(typedList.Items, node) + default: + Fail(fmt.Sprintf("unknown type: %T", typedList)) + } + return nil + }) - It("generates correct data for multiple nodes", func() { - node := v1.Node{ - ObjectMeta: metav1.ObjectMeta{Name: "node1"}, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{Name: "node2"}, - } - node3 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{Name: "node3"}, - } + expData.NodeCount = 1 - k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { - Expect(option).To(BeEmpty()) + data, err := dataCollector.Collect(ctx) - switch typedList := list.(type) { - case *v1.NodeList: - typedList.Items = append(typedList.Items, node, node2, node3) - default: - Fail(fmt.Sprintf("unknown type: %T", typedList)) - } - return nil + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) }) + }) + When("it encounters an error while collecting data", func() { + It("should error on kubernetes client api errors", func() { + k8sClientReader.ListReturns(errors.New("there was an error")) - expData := telemetry.Data{ - ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, - NodeCount: 3, - NGFResourceCounts: telemetry.NGFResourceCounts{}, - } - - data, err := dataCollector.Collect(ctx) - - Expect(err).To(BeNil()) - Expect(expData).To(Equal(data)) + _, err := dataCollector.Collect(ctx) + Expect(err).To(HaveOccurred()) + }) }) }) - When("retrieving NGF resource counts", func() { - It("generates correct data for graph with one of each resource", func() { - fakeGraphGetter.GetLatestGraphReturns(graph1) - fakeConfigurationGetter.GetLatestConfigurationReturns(config1) - - expData := telemetry.Data{ - ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, - NodeCount: 3, - NGFResourceCounts: telemetry.NGFResourceCounts{ - Gateways: 1, - GatewayClasses: 1, - HTTPRoutes: 1, - Secrets: 1, - Services: 1, - Endpoints: 1, + Describe("NGF resource count collector", func() { + var ( + graph1 *graph.Graph + config1 *dataplane.Configuration + ) + + BeforeAll(func() { + secret := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret1"}} + svc := &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "svc1"}} + + graph1 = &graph.Graph{ + GatewayClass: &graph.GatewayClass{}, + Gateway: &graph.Gateway{}, + Routes: map[types.NamespacedName]*graph.Route{ + {Namespace: "test", Name: "hr-1"}: {}, + }, + ReferencedSecrets: map[types.NamespacedName]*graph.Secret{ + client.ObjectKeyFromObject(secret): { + Source: secret, + }, + }, + ReferencedServices: map[types.NamespacedName]struct{}{ + client.ObjectKeyFromObject(svc): {}, }, } - data, err := dataCollector.Collect(ctx) - - Expect(err).To(BeNil()) - Expect(expData).To(Equal(data)) + config1 = &dataplane.Configuration{ + Upstreams: []dataplane.Upstream{ + { + Name: "upstream1", + ErrorMsg: "", + Endpoints: []resolver.Endpoint{ + { + Address: "endpoint1", + Port: 80, + }, + }, + }, + }, + } }) - It("generates correct data for graph with multiple of each resource", func() { - fakeGraphGetter.GetLatestGraphReturns(graph2) - fakeConfigurationGetter.GetLatestConfigurationReturns(config2) + When("collecting NGF resource counts", func() { + It("collects correct data for graph with no resources", func() { + fakeGraphGetter.GetLatestGraphReturns(&graph.Graph{}) + fakeConfigurationGetter.GetLatestConfigurationReturns(&dataplane.Configuration{}) - expData := telemetry.Data{ - ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: version}, - NodeCount: 3, - NGFResourceCounts: telemetry.NGFResourceCounts{ - Gateways: 1, - GatewayClasses: 1, - HTTPRoutes: 3, - Secrets: 3, - Services: 3, - Endpoints: 4, - }, - } + expData.NGFResourceCounts = telemetry.NGFResourceCounts{} - data, err := dataCollector.Collect(ctx) + data, err := dataCollector.Collect(ctx) - Expect(err).To(BeNil()) - Expect(expData).To(Equal(data)) - }) - }) + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) - When("it encounters an error while collecting data", func() { - BeforeEach(func() { - k8sClientReader.ListReturns(nil) - fakeGraphGetter.GetLatestGraphReturns(&graph.Graph{}) - fakeConfigurationGetter.GetLatestConfigurationReturns(&dataplane.Configuration{}) - }) + It("collects correct data for graph with one of each resource", func() { + fakeGraphGetter.GetLatestGraphReturns(graph1) + fakeConfigurationGetter.GetLatestConfigurationReturns(config1) - It("should error on client errors", func() { - k8sClientReader.ListReturns(errors.New("there was an error")) + expData.NGFResourceCounts = telemetry.NGFResourceCounts{ + Gateways: 1, + GatewayClasses: 1, + HTTPRoutes: 1, + Secrets: 1, + Services: 1, + Endpoints: 1, + } - _, err := dataCollector.Collect(ctx) - Expect(err).To(HaveOccurred()) - }) + data, err := dataCollector.Collect(ctx) + + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) - It("should error on nil latest graph", func() { - fakeGraphGetter.GetLatestGraphReturns(nil) + When("it encounters an error while collecting data", func() { + BeforeEach(func() { + fakeGraphGetter.GetLatestGraphReturns(&graph.Graph{}) + fakeConfigurationGetter.GetLatestConfigurationReturns(&dataplane.Configuration{}) + }) + It("should error on nil latest graph", func() { + fakeGraphGetter.GetLatestGraphReturns(nil) - _, err := dataCollector.Collect(ctx) - Expect(err).To(HaveOccurred()) - }) + _, err := dataCollector.Collect(ctx) + Expect(err).To(HaveOccurred()) + }) - It("should error on nil latest configuration", func() { - fakeConfigurationGetter.GetLatestConfigurationReturns(nil) + It("should error on nil latest configuration", func() { + fakeConfigurationGetter.GetLatestConfigurationReturns(nil) - _, err := dataCollector.Collect(ctx) - Expect(err).To(HaveOccurred()) + _, err := dataCollector.Collect(ctx) + Expect(err).To(HaveOccurred()) + }) + }) }) }) }) diff --git a/internal/mode/static/telemetry/job.go b/internal/mode/static/telemetry/job.go index 6888b5deb9..ad8c295899 100644 --- a/internal/mode/static/telemetry/job.go +++ b/internal/mode/static/telemetry/job.go @@ -44,6 +44,9 @@ func NewJob(cfg JobConfig) *Job { // Start starts the telemetry job. // Implements controller-runtime manager.Runnable func (j *Job) Start(ctx context.Context) error { + // wait here until pod is ready, have to propagate down health checker for pod + // make sure I can gracefully terminate if the context is canceled + j.cfg.Logger.Info("Starting telemetry job") report := func(ctx context.Context) { From 89eb5ce488dde52cc7bb943e7b5007094d8e21e5 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Wed, 31 Jan 2024 18:17:51 -0800 Subject: [PATCH 25/43] Add count for ignored gateway classes and gateways --- .../mode/static/telemetry/collector_test.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index 28580b6cd7..b364b86725 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -93,9 +93,16 @@ var _ = Describe("Collector", Ordered, func() { nilsvc := &v1.Service{ObjectMeta: metav1.ObjectMeta{Name: "nilsvc"}} graph := &graph.Graph{ - GatewayClass: &graph.GatewayClass{}, - Gateway: &graph.Gateway{}, - IgnoredGatewayClasses: map[types.NamespacedName]*gatewayv1.GatewayClass{}, + GatewayClass: &graph.GatewayClass{}, + Gateway: &graph.Gateway{}, + IgnoredGatewayClasses: map[types.NamespacedName]*gatewayv1.GatewayClass{ + {Name: "ignoredGC1"}: {}, + {Name: "ignoredGC2"}: {}, + }, + IgnoredGateways: map[types.NamespacedName]*gatewayv1.Gateway{ + {Name: "ignoredGw1"}: {}, + {Name: "ignoredGw2"}: {}, + }, Routes: map[types.NamespacedName]*graph.Route{ {Namespace: "test", Name: "hr-1"}: {}, {Namespace: "test", Name: "hr-2"}: {}, @@ -169,8 +176,8 @@ var _ = Describe("Collector", Ordered, func() { expData.NodeCount = 3 expData.NGFResourceCounts = telemetry.NGFResourceCounts{ - Gateways: 1, - GatewayClasses: 1, + Gateways: 3, + GatewayClasses: 3, HTTPRoutes: 3, Secrets: 3, Services: 3, From 7213348e1e23c40a54c331635371f1c647ee1b0b Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 1 Feb 2024 11:14:23 -0800 Subject: [PATCH 26/43] Add HealthChecker and blocking on job until NGF is ready --- internal/mode/static/handler.go | 4 +- internal/mode/static/handler_test.go | 14 ++- internal/mode/static/health.go | 26 ++++- internal/mode/static/health_test.go | 2 +- internal/mode/static/manager.go | 4 +- .../mode/static/state/graph/service_test.go | 1 - internal/mode/static/telemetry/job.go | 18 ++++ internal/mode/static/telemetry/job_test.go | 17 ++- .../telemetryfakes/fake_health_checker.go | 102 ++++++++++++++++++ 9 files changed, 172 insertions(+), 16 deletions(-) create mode 100644 internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go diff --git a/internal/mode/static/handler.go b/internal/mode/static/handler.go index 3a2ebcd4f0..603f3f91ec 100644 --- a/internal/mode/static/handler.go +++ b/internal/mode/static/handler.go @@ -59,8 +59,8 @@ type eventHandlerConfig struct { logLevelSetter logLevelSetter // metricsCollector collects metrics for this controller. metricsCollector handlerMetricsCollector - // healthChecker sets the health of the Pod to Ready once we've written out our initial config - healthChecker *healthChecker + // healthChecker sets the health of the Pod to Ready once we've written out our initial config. + healthChecker *healthCheckerImpl // controlConfigNSName is the NamespacedName of the NginxGateway config for this controller. controlConfigNSName types.NamespacedName // version is the current version number of the nginx config. diff --git a/internal/mode/static/handler_test.go b/internal/mode/static/handler_test.go index a2fd0b3a21..0f8e9b8070 100644 --- a/internal/mode/static/handler_test.go +++ b/internal/mode/static/handler_test.go @@ -84,7 +84,7 @@ var _ = Describe("eventHandler", func() { nginxRuntimeMgr: fakeNginxRuntimeMgr, statusUpdater: fakeStatusUpdater, eventRecorder: fakeEventRecorder, - healthChecker: &healthChecker{}, + healthChecker: newHealthCheckerImpl(), controlConfigNSName: types.NamespacedName{Namespace: namespace, Name: configName}, gatewayPodConfig: config.GatewayPodConfig{ ServiceName: "nginx-gateway", @@ -429,6 +429,7 @@ var _ = Describe("eventHandler", func() { It("should set the health checker status properly when there are changes", func() { e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} batch := []interface{}{e} + readyChannel := handler.cfg.healthChecker.GetReadyIfClosedChannel() fakeProcessor.ProcessReturns(state.ClusterStateChange, &graph.Graph{}) @@ -437,24 +438,32 @@ var _ = Describe("eventHandler", func() { Expect(helpers.Diff(handler.GetLatestConfiguration(), &dataplane.Configuration{Version: 1})).To(BeEmpty()) + _, ok := <-readyChannel + Expect(ok).To(BeFalse()) + Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed()) }) It("should set the health checker status properly when there are no changes or errors", func() { e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} batch := []interface{}{e} + readyChannel := handler.cfg.healthChecker.GetReadyIfClosedChannel() Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed()) handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) Expect(handler.GetLatestConfiguration()).To(BeNil()) + _, ok := <-readyChannel + Expect(ok).To(BeFalse()) + Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed()) }) It("should set the health checker status properly when there is an error", func() { e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} batch := []interface{}{e} + readyChannel := handler.cfg.healthChecker.GetReadyIfClosedChannel() fakeProcessor.ProcessReturns(state.ClusterStateChange, &graph.Graph{}) fakeNginxRuntimeMgr.ReloadReturns(errors.New("reload error")) @@ -478,6 +487,9 @@ var _ = Describe("eventHandler", func() { Expect(helpers.Diff(handler.GetLatestConfiguration(), &dataplane.Configuration{Version: 2})).To(BeEmpty()) + _, ok := <-readyChannel + Expect(ok).To(BeFalse()) + Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed()) }) diff --git a/internal/mode/static/health.go b/internal/mode/static/health.go index 3a764c347b..9373970ffb 100644 --- a/internal/mode/static/health.go +++ b/internal/mode/static/health.go @@ -6,18 +6,28 @@ import ( "sync" ) -type healthChecker struct { +// newHealthCheckerImpl creates a new healthCheckerImpl. +func newHealthCheckerImpl() *healthCheckerImpl { + return &healthCheckerImpl{ + readyIfClosedChan: make(chan struct{}), + } +} + +// healthCheckerImpl implements HealthChecker. +type healthCheckerImpl struct { // firstBatchError is set when the first batch fails to configure nginx // and we don't want to set ourselves as ready on the next batch if nothing changes firstBatchError error - lock sync.RWMutex - ready bool + // readyIfClosedChan is a channel that is initialized in NewHealthCheckerImpl and represents if the NGF Pod is ready. + readyIfClosedChan chan struct{} + lock sync.RWMutex + ready bool } // readyCheck returns the ready-state of the Pod. It satisfies the controller-runtime Checker type. // We are considered ready after the handler processed the first batch. In case there is NGINX configuration // to write, it must be written and NGINX must be reloaded successfully. -func (h *healthChecker) readyCheck(_ *http.Request) error { +func (h *healthCheckerImpl) readyCheck(_ *http.Request) error { h.lock.RLock() defer h.lock.RUnlock() @@ -29,10 +39,16 @@ func (h *healthChecker) readyCheck(_ *http.Request) error { } // setAsReady marks the health check as ready. -func (h *healthChecker) setAsReady() { +func (h *healthCheckerImpl) setAsReady() { h.lock.Lock() defer h.lock.Unlock() h.ready = true h.firstBatchError = nil + close(h.readyIfClosedChan) +} + +// GetReadyIfClosedChannel returns a channel which determines if the NGF Pod is ready. +func (h *healthCheckerImpl) GetReadyIfClosedChannel() chan struct{} { + return h.readyIfClosedChan } diff --git a/internal/mode/static/health_test.go b/internal/mode/static/health_test.go index 77dfa30ed9..5515160302 100644 --- a/internal/mode/static/health_test.go +++ b/internal/mode/static/health_test.go @@ -8,7 +8,7 @@ import ( func TestReadyCheck(t *testing.T) { g := NewWithT(t) - hc := healthChecker{} + hc := newHealthCheckerImpl() g.Expect(hc.readyCheck(nil)).ToNot(Succeed()) hc.ready = true diff --git a/internal/mode/static/manager.go b/internal/mode/static/manager.go index 445db3b289..a70bba2c24 100644 --- a/internal/mode/static/manager.go +++ b/internal/mode/static/manager.go @@ -69,7 +69,7 @@ func init() { // nolint:gocyclo func StartManager(cfg config.Config) error { - hc := &healthChecker{} + hc := newHealthCheckerImpl() mgr, err := createManager(cfg, hc) if err != nil { return fmt.Errorf("cannot build runtime manager: %w", err) @@ -221,7 +221,7 @@ func StartManager(cfg config.Config) error { return mgr.Start(ctx) } -func createManager(cfg config.Config, hc *healthChecker) (manager.Manager, error) { +func createManager(cfg config.Config, hc *healthCheckerImpl) (manager.Manager, error) { options := manager.Options{ Scheme: scheme, Logger: cfg.Logger, diff --git a/internal/mode/static/state/graph/service_test.go b/internal/mode/static/state/graph/service_test.go index 82cf2960e0..2eb0aaf646 100644 --- a/internal/mode/static/state/graph/service_test.go +++ b/internal/mode/static/state/graph/service_test.go @@ -4,7 +4,6 @@ import ( "testing" . "github.com/onsi/gomega" - "k8s.io/apimachinery/pkg/types" ) diff --git a/internal/mode/static/telemetry/job.go b/internal/mode/static/telemetry/job.go index ad8c295899..fa04164ca9 100644 --- a/internal/mode/static/telemetry/job.go +++ b/internal/mode/static/telemetry/job.go @@ -17,12 +17,22 @@ type DataCollector interface { Collect(ctx context.Context) (Data, error) } +//go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . HealthChecker + +// HealthChecker checks if the NGF Pod is ready. +type HealthChecker interface { + // GetReadyIfClosedChannel returns a channel which determines if the NGF Pod is ready. + GetReadyIfClosedChannel() chan struct{} +} + // JobConfig is the configuration for the telemetry job. type JobConfig struct { // Exporter is the exporter to use for exporting telemetry data. Exporter Exporter // DataCollector is the collector to use for collecting telemetry data. DataCollector DataCollector + // HealthChecker lets us check if the NGF Pod is ready. + HealthChecker HealthChecker // Logger is the logger. Logger logr.Logger // Period defines the period of the telemetry job. The job will run every Period. @@ -47,6 +57,14 @@ func (j *Job) Start(ctx context.Context) error { // wait here until pod is ready, have to propagate down health checker for pod // make sure I can gracefully terminate if the context is canceled + readyChannel := j.cfg.HealthChecker.GetReadyIfClosedChannel() + select { + case <-readyChannel: + case <-ctx.Done(): + j.cfg.Logger.V(1).Info("Failed to start telemetry job") + return ctx.Err() + } + j.cfg.Logger.Info("Starting telemetry job") report := func(ctx context.Context) { diff --git a/internal/mode/static/telemetry/job_test.go b/internal/mode/static/telemetry/job_test.go index a54cccedcf..2c4339ede8 100644 --- a/internal/mode/static/telemetry/job_test.go +++ b/internal/mode/static/telemetry/job_test.go @@ -15,21 +15,26 @@ import ( var _ = Describe("Job", func() { var ( - job *telemetry.Job - exporter *telemetryfakes.FakeExporter - dataCollector *telemetryfakes.FakeDataCollector - expData telemetry.Data + job *telemetry.Job + exporter *telemetryfakes.FakeExporter + dataCollector *telemetryfakes.FakeDataCollector + healthCollector *telemetryfakes.FakeHealthChecker + expData telemetry.Data + readyChannel chan struct{} ) const timeout = 10 * time.Second BeforeEach(func() { exporter = &telemetryfakes.FakeExporter{} dataCollector = &telemetryfakes.FakeDataCollector{} + healthCollector = &telemetryfakes.FakeHealthChecker{} + job = telemetry.NewJob(telemetry.JobConfig{ Exporter: exporter, Logger: zap.New(), Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the Job should report a few times DataCollector: dataCollector, + HealthChecker: healthCollector, }) expData = telemetry.Data{ @@ -55,6 +60,10 @@ var _ = Describe("Job", func() { ctx, cancel := context.WithTimeout(context.Background(), timeout) + readyChannel = make(chan struct{}) + healthCollector.GetReadyIfClosedChannelReturns(readyChannel) + close(readyChannel) + errCh := make(chan error) go func() { errCh <- job.Start(ctx) diff --git a/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go b/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go new file mode 100644 index 0000000000..e7915a3d9c --- /dev/null +++ b/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go @@ -0,0 +1,102 @@ +// Code generated by counterfeiter. DO NOT EDIT. +package telemetryfakes + +import ( + "sync" + + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry" +) + +type FakeHealthChecker struct { + GetReadyIfClosedChannelStub func() chan struct{} + getReadyIfClosedChannelMutex sync.RWMutex + getReadyIfClosedChannelArgsForCall []struct { + } + getReadyIfClosedChannelReturns struct { + result1 chan struct{} + } + getReadyIfClosedChannelReturnsOnCall map[int]struct { + result1 chan struct{} + } + invocations map[string][][]interface{} + invocationsMutex sync.RWMutex +} + +func (fake *FakeHealthChecker) GetReadyIfClosedChannel() chan struct{} { + fake.getReadyIfClosedChannelMutex.Lock() + ret, specificReturn := fake.getReadyIfClosedChannelReturnsOnCall[len(fake.getReadyIfClosedChannelArgsForCall)] + fake.getReadyIfClosedChannelArgsForCall = append(fake.getReadyIfClosedChannelArgsForCall, struct { + }{}) + stub := fake.GetReadyIfClosedChannelStub + fakeReturns := fake.getReadyIfClosedChannelReturns + fake.recordInvocation("GetReadyIfClosedChannel", []interface{}{}) + fake.getReadyIfClosedChannelMutex.Unlock() + if stub != nil { + return stub() + } + if specificReturn { + return ret.result1 + } + return fakeReturns.result1 +} + +func (fake *FakeHealthChecker) GetReadyIfClosedChannelCallCount() int { + fake.getReadyIfClosedChannelMutex.RLock() + defer fake.getReadyIfClosedChannelMutex.RUnlock() + return len(fake.getReadyIfClosedChannelArgsForCall) +} + +func (fake *FakeHealthChecker) GetReadyIfClosedChannelCalls(stub func() chan struct{}) { + fake.getReadyIfClosedChannelMutex.Lock() + defer fake.getReadyIfClosedChannelMutex.Unlock() + fake.GetReadyIfClosedChannelStub = stub +} + +func (fake *FakeHealthChecker) GetReadyIfClosedChannelReturns(result1 chan struct{}) { + fake.getReadyIfClosedChannelMutex.Lock() + defer fake.getReadyIfClosedChannelMutex.Unlock() + fake.GetReadyIfClosedChannelStub = nil + fake.getReadyIfClosedChannelReturns = struct { + result1 chan struct{} + }{result1} +} + +func (fake *FakeHealthChecker) GetReadyIfClosedChannelReturnsOnCall(i int, result1 chan struct{}) { + fake.getReadyIfClosedChannelMutex.Lock() + defer fake.getReadyIfClosedChannelMutex.Unlock() + fake.GetReadyIfClosedChannelStub = nil + if fake.getReadyIfClosedChannelReturnsOnCall == nil { + fake.getReadyIfClosedChannelReturnsOnCall = make(map[int]struct { + result1 chan struct{} + }) + } + fake.getReadyIfClosedChannelReturnsOnCall[i] = struct { + result1 chan struct{} + }{result1} +} + +func (fake *FakeHealthChecker) Invocations() map[string][][]interface{} { + fake.invocationsMutex.RLock() + defer fake.invocationsMutex.RUnlock() + fake.getReadyIfClosedChannelMutex.RLock() + defer fake.getReadyIfClosedChannelMutex.RUnlock() + copiedInvocations := map[string][][]interface{}{} + for key, value := range fake.invocations { + copiedInvocations[key] = value + } + return copiedInvocations +} + +func (fake *FakeHealthChecker) recordInvocation(key string, args []interface{}) { + fake.invocationsMutex.Lock() + defer fake.invocationsMutex.Unlock() + if fake.invocations == nil { + fake.invocations = map[string][][]interface{}{} + } + if fake.invocations[key] == nil { + fake.invocations[key] = [][]interface{}{} + } + fake.invocations[key] = append(fake.invocations[key], args) +} + +var _ telemetry.HealthChecker = new(FakeHealthChecker) From 32be3b73158c459dc859bf281b95668f906adafc Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Thu, 1 Feb 2024 12:21:49 -0800 Subject: [PATCH 27/43] Add job tests for waiting on health checker --- internal/mode/static/health.go | 2 +- internal/mode/static/telemetry/job.go | 4 +-- internal/mode/static/telemetry/job_test.go | 35 +++++++++++++++---- .../telemetryfakes/fake_health_checker.go | 20 +++++------ 4 files changed, 41 insertions(+), 20 deletions(-) diff --git a/internal/mode/static/health.go b/internal/mode/static/health.go index 9373970ffb..86766a9c82 100644 --- a/internal/mode/static/health.go +++ b/internal/mode/static/health.go @@ -49,6 +49,6 @@ func (h *healthCheckerImpl) setAsReady() { } // GetReadyIfClosedChannel returns a channel which determines if the NGF Pod is ready. -func (h *healthCheckerImpl) GetReadyIfClosedChannel() chan struct{} { +func (h *healthCheckerImpl) GetReadyIfClosedChannel() <-chan struct{} { return h.readyIfClosedChan } diff --git a/internal/mode/static/telemetry/job.go b/internal/mode/static/telemetry/job.go index fa04164ca9..50785aabeb 100644 --- a/internal/mode/static/telemetry/job.go +++ b/internal/mode/static/telemetry/job.go @@ -22,7 +22,7 @@ type DataCollector interface { // HealthChecker checks if the NGF Pod is ready. type HealthChecker interface { // GetReadyIfClosedChannel returns a channel which determines if the NGF Pod is ready. - GetReadyIfClosedChannel() chan struct{} + GetReadyIfClosedChannel() <-chan struct{} } // JobConfig is the configuration for the telemetry job. @@ -61,7 +61,7 @@ func (j *Job) Start(ctx context.Context) error { select { case <-readyChannel: case <-ctx.Done(): - j.cfg.Logger.V(1).Info("Failed to start telemetry job") + j.cfg.Logger.Info("Context canceled, failed to start telemetry job") return ctx.Err() } diff --git a/internal/mode/static/telemetry/job_test.go b/internal/mode/static/telemetry/job_test.go index 2c4339ede8..cbf455d904 100644 --- a/internal/mode/static/telemetry/job_test.go +++ b/internal/mode/static/telemetry/job_test.go @@ -29,6 +29,9 @@ var _ = Describe("Job", func() { dataCollector = &telemetryfakes.FakeDataCollector{} healthCollector = &telemetryfakes.FakeHealthChecker{} + readyChannel = make(chan struct{}) + healthCollector.GetReadyIfClosedChannelReturns(readyChannel) + job = telemetry.NewJob(telemetry.JobConfig{ Exporter: exporter, Logger: zap.New(), @@ -54,21 +57,19 @@ var _ = Describe("Job", func() { DescribeTable( "Job runs with a few reports without any errors", - func(exporterError error) { + func(exporterError error, sleep time.Duration) { // The fact that exporter return an error must not affect how many times the Job makes a report. exporter.ExportReturns(exporterError) ctx, cancel := context.WithTimeout(context.Background(), timeout) - readyChannel = make(chan struct{}) - healthCollector.GetReadyIfClosedChannelReturns(readyChannel) - close(readyChannel) - errCh := make(chan error) go func() { errCh <- job.Start(ctx) close(errCh) }() + time.Sleep(sleep) + close(readyChannel) const minReports = 2 // ensure that the Job reports more than once: it doesn't exit after the first report @@ -82,7 +83,27 @@ var _ = Describe("Job", func() { Eventually(errCh).Should(Receive(BeNil())) Eventually(errCh).Should(BeClosed()) }, - Entry("Job runs with Exporter not returning errors", nil), - Entry("Job runs with Exporter returning an error", errors.New("some error")), + Entry("Job runs with Exporter not returning errors", nil, time.Duration(0)), + Entry("Job runs with Exporter returning an error", errors.New("some error"), time.Duration(0)), + Entry("Job runs with extended time waiting for NGF Pod to be ready", nil, 500*time.Millisecond), ) + + When("job context is canceled", func() { + It("should gracefully exist", func() { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + + errCh := make(chan error) + go func() { + errCh <- job.Start(ctx) + close(errCh) + }() + + sleep := 500 * time.Millisecond + time.Sleep(sleep) + cancel() + + Eventually(errCh).Should(Receive()) + Eventually(errCh).Should(BeClosed()) + }) + }) }) diff --git a/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go b/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go index e7915a3d9c..ead2601381 100644 --- a/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go +++ b/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go @@ -8,21 +8,21 @@ import ( ) type FakeHealthChecker struct { - GetReadyIfClosedChannelStub func() chan struct{} + GetReadyIfClosedChannelStub func() <-chan struct{} getReadyIfClosedChannelMutex sync.RWMutex getReadyIfClosedChannelArgsForCall []struct { } getReadyIfClosedChannelReturns struct { - result1 chan struct{} + result1 <-chan struct{} } getReadyIfClosedChannelReturnsOnCall map[int]struct { - result1 chan struct{} + result1 <-chan struct{} } invocations map[string][][]interface{} invocationsMutex sync.RWMutex } -func (fake *FakeHealthChecker) GetReadyIfClosedChannel() chan struct{} { +func (fake *FakeHealthChecker) GetReadyIfClosedChannel() <-chan struct{} { fake.getReadyIfClosedChannelMutex.Lock() ret, specificReturn := fake.getReadyIfClosedChannelReturnsOnCall[len(fake.getReadyIfClosedChannelArgsForCall)] fake.getReadyIfClosedChannelArgsForCall = append(fake.getReadyIfClosedChannelArgsForCall, struct { @@ -46,32 +46,32 @@ func (fake *FakeHealthChecker) GetReadyIfClosedChannelCallCount() int { return len(fake.getReadyIfClosedChannelArgsForCall) } -func (fake *FakeHealthChecker) GetReadyIfClosedChannelCalls(stub func() chan struct{}) { +func (fake *FakeHealthChecker) GetReadyIfClosedChannelCalls(stub func() <-chan struct{}) { fake.getReadyIfClosedChannelMutex.Lock() defer fake.getReadyIfClosedChannelMutex.Unlock() fake.GetReadyIfClosedChannelStub = stub } -func (fake *FakeHealthChecker) GetReadyIfClosedChannelReturns(result1 chan struct{}) { +func (fake *FakeHealthChecker) GetReadyIfClosedChannelReturns(result1 <-chan struct{}) { fake.getReadyIfClosedChannelMutex.Lock() defer fake.getReadyIfClosedChannelMutex.Unlock() fake.GetReadyIfClosedChannelStub = nil fake.getReadyIfClosedChannelReturns = struct { - result1 chan struct{} + result1 <-chan struct{} }{result1} } -func (fake *FakeHealthChecker) GetReadyIfClosedChannelReturnsOnCall(i int, result1 chan struct{}) { +func (fake *FakeHealthChecker) GetReadyIfClosedChannelReturnsOnCall(i int, result1 <-chan struct{}) { fake.getReadyIfClosedChannelMutex.Lock() defer fake.getReadyIfClosedChannelMutex.Unlock() fake.GetReadyIfClosedChannelStub = nil if fake.getReadyIfClosedChannelReturnsOnCall == nil { fake.getReadyIfClosedChannelReturnsOnCall = make(map[int]struct { - result1 chan struct{} + result1 <-chan struct{} }) } fake.getReadyIfClosedChannelReturnsOnCall[i] = struct { - result1 chan struct{} + result1 <-chan struct{} }{result1} } From c07cb17bcbf83686eab0a037c921d9ea8545c423 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 2 Feb 2024 08:45:43 -0800 Subject: [PATCH 28/43] Add review feedback --- internal/mode/static/handler_test.go | 9 +++------ internal/mode/static/health.go | 2 +- internal/mode/static/telemetry/job.go | 3 --- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/internal/mode/static/handler_test.go b/internal/mode/static/handler_test.go index 0f8e9b8070..449d82f614 100644 --- a/internal/mode/static/handler_test.go +++ b/internal/mode/static/handler_test.go @@ -438,8 +438,7 @@ var _ = Describe("eventHandler", func() { Expect(helpers.Diff(handler.GetLatestConfiguration(), &dataplane.Configuration{Version: 1})).To(BeEmpty()) - _, ok := <-readyChannel - Expect(ok).To(BeFalse()) + Expect(readyChannel).To(BeClosed()) Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed()) }) @@ -454,8 +453,7 @@ var _ = Describe("eventHandler", func() { Expect(handler.GetLatestConfiguration()).To(BeNil()) - _, ok := <-readyChannel - Expect(ok).To(BeFalse()) + Expect(readyChannel).To(BeClosed()) Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed()) }) @@ -487,8 +485,7 @@ var _ = Describe("eventHandler", func() { Expect(helpers.Diff(handler.GetLatestConfiguration(), &dataplane.Configuration{Version: 2})).To(BeEmpty()) - _, ok := <-readyChannel - Expect(ok).To(BeFalse()) + Expect(readyChannel).To(BeClosed()) Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed()) }) diff --git a/internal/mode/static/health.go b/internal/mode/static/health.go index 86766a9c82..0efc5d4676 100644 --- a/internal/mode/static/health.go +++ b/internal/mode/static/health.go @@ -48,7 +48,7 @@ func (h *healthCheckerImpl) setAsReady() { close(h.readyIfClosedChan) } -// GetReadyIfClosedChannel returns a channel which determines if the NGF Pod is ready. +// GetReadyIfClosedChannel returns a read-only channel, which determines if the NGF Pod is ready. func (h *healthCheckerImpl) GetReadyIfClosedChannel() <-chan struct{} { return h.readyIfClosedChan } diff --git a/internal/mode/static/telemetry/job.go b/internal/mode/static/telemetry/job.go index 50785aabeb..56c5202004 100644 --- a/internal/mode/static/telemetry/job.go +++ b/internal/mode/static/telemetry/job.go @@ -54,9 +54,6 @@ func NewJob(cfg JobConfig) *Job { // Start starts the telemetry job. // Implements controller-runtime manager.Runnable func (j *Job) Start(ctx context.Context) error { - // wait here until pod is ready, have to propagate down health checker for pod - // make sure I can gracefully terminate if the context is canceled - readyChannel := j.cfg.HealthChecker.GetReadyIfClosedChannel() select { case <-readyChannel: From 57805ad1fdee7ced3d1437e858e644d0b13eb7c8 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 2 Feb 2024 10:51:13 -0800 Subject: [PATCH 29/43] Refactor CreateTelemetryJobWorker and job tests --- internal/mode/static/manager.go | 62 +++++++--------- internal/mode/static/telemetry/job.go | 82 ++++++---------------- internal/mode/static/telemetry/job_test.go | 21 +++--- 3 files changed, 59 insertions(+), 106 deletions(-) diff --git a/internal/mode/static/manager.go b/internal/mode/static/manager.go index a70bba2c24..1909ee3dcc 100644 --- a/internal/mode/static/manager.go +++ b/internal/mode/static/manager.go @@ -213,7 +213,33 @@ func StartManager(cfg config.Config) error { return fmt.Errorf("cannot register status updater: %w", err) } - if err = mgr.Add(createTelemetryJob(cfg)); err != nil { + logger := cfg.Logger.WithName("telemetryJob") + exporter := telemetry.NewLoggingExporter(cfg.Logger.WithName("telemetryExporter").V(1 /* debug */)) + + // 10 min jitter is enough per telemetry destination recommendation + // For the default period of 24 hours, jitter will be 10min /(24*60)min = 0.0069 + jitterFactor := 10.0 / (24 * 60) // added jitter is bound by jitterFactor * period + + dataCollector := telemetry.NewDataCollectorImpl(telemetry.DataCollectorConfig{ + K8sClientReader: mgr.GetClient(), + GraphGetter: processor, + ConfigurationGetter: eventHandler, + Version: cfg.Version, + }) + + worker := telemetry.CreateTelemetryJobWorker(logger, exporter, dataCollector, hc) + + telemetryJob := &runnables.Leader{ + Runnable: runnables.NewCronJob( + runnables.CronJobConfig{ + Worker: worker, + Logger: logger, + Period: cfg.TelemetryReportPeriod, + JitterFactor: jitterFactor, + }, + ), + } + if err = mgr.Add(telemetryJob); err != nil { return fmt.Errorf("cannot register telemetry job: %w", err) } @@ -438,40 +464,6 @@ func prepareFirstEventBatchPreparerArgs( return objects, objectLists } -func createTelemetryJob(cfg config.Config) *runnables.Leader { - logger := cfg.Logger.WithName("telemetryJob") - exporter := telemetry.NewLoggingExporter(cfg.Logger.WithName("telemetryExporter").V(1 /* debug */)) - - worker := func(ctx context.Context) { - // Gather telemetry - logger.V(1).Info("Gathering telemetry") - - // We will need to gather data as defined in https://github.com/nginxinc/nginx-gateway-fabric/issues/793 - data := telemetry.Data{} - - // Export telemetry - logger.V(1).Info("Exporting telemetry") - - if err := exporter.Export(ctx, data); err != nil { - logger.Error(err, "Failed to export telemetry") - } - } - // 10 min jitter is enough per telemetry destination recommendation - // For the default period of 24 hours, jitter will be 10min /(24*60)min = 0.0069 - jitterFactor := 10.0 / (24 * 60) // added jitter is bound by jitterFactor * period - - return &runnables.Leader{ - Runnable: runnables.NewCronJob( - runnables.CronJobConfig{ - Worker: worker, - Logger: logger, - Period: cfg.TelemetryReportPeriod, - JitterFactor: jitterFactor, - }, - ), - } -} - func setInitialConfig( reader client.Reader, logger logr.Logger, diff --git a/internal/mode/static/telemetry/job.go b/internal/mode/static/telemetry/job.go index 56c5202004..2ef39631a5 100644 --- a/internal/mode/static/telemetry/job.go +++ b/internal/mode/static/telemetry/job.go @@ -2,11 +2,8 @@ package telemetry import ( "context" - "time" "github.com/go-logr/logr" - "k8s.io/apimachinery/pkg/util/wait" - "sigs.k8s.io/controller-runtime/pkg/manager" ) //go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . DataCollector @@ -25,74 +22,35 @@ type HealthChecker interface { GetReadyIfClosedChannel() <-chan struct{} } -// JobConfig is the configuration for the telemetry job. -type JobConfig struct { - // Exporter is the exporter to use for exporting telemetry data. - Exporter Exporter - // DataCollector is the collector to use for collecting telemetry data. - DataCollector DataCollector - // HealthChecker lets us check if the NGF Pod is ready. - HealthChecker HealthChecker - // Logger is the logger. - Logger logr.Logger - // Period defines the period of the telemetry job. The job will run every Period. - Period time.Duration -} - -// Job periodically exports telemetry data using the provided exporter. -type Job struct { - cfg JobConfig -} - -// NewJob creates a new telemetry job. -func NewJob(cfg JobConfig) *Job { - return &Job{ - cfg: cfg, - } -} - -// Start starts the telemetry job. -// Implements controller-runtime manager.Runnable -func (j *Job) Start(ctx context.Context) error { - readyChannel := j.cfg.HealthChecker.GetReadyIfClosedChannel() - select { - case <-readyChannel: - case <-ctx.Done(): - j.cfg.Logger.Info("Context canceled, failed to start telemetry job") - return ctx.Err() - } - - j.cfg.Logger.Info("Starting telemetry job") +func CreateTelemetryJobWorker( + logger logr.Logger, + exporter Exporter, + dataCollector DataCollector, + healthChecker HealthChecker, +) func(ctx context.Context) { + return func(ctx context.Context) { + readyChannel := healthChecker.GetReadyIfClosedChannel() + select { + case <-readyChannel: + case <-ctx.Done(): + logger.Info("Context canceled, failed to start telemetry job") + return + } - report := func(ctx context.Context) { // Gather telemetry - j.cfg.Logger.V(1).Info("Gathering telemetry data") + logger.V(1).Info("Gathering telemetry data") // We will need to gather data as defined in https://github.com/nginxinc/nginx-gateway-fabric/issues/793 - data, err := j.cfg.DataCollector.Collect(ctx) + data, err := dataCollector.Collect(ctx) if err != nil { - j.cfg.Logger.Error(err, "Failed to collect telemetry data") + logger.Error(err, "Failed to collect telemetry data") } // Export telemetry - j.cfg.Logger.V(1).Info("Exporting telemetry data") + logger.V(1).Info("Exporting telemetry data") - if err := j.cfg.Exporter.Export(ctx, data); err != nil { - j.cfg.Logger.Error(err, "Failed to export telemetry data") + if err := exporter.Export(ctx, data); err != nil { + logger.Error(err, "Failed to export telemetry data") } } - - const ( - // 10 min jitter is enough per telemetry destination recommendation - // For the default period of 24 hours, jitter will be 10min /(24*60)min = 0.0069 - jitterFactor = 10.0 / (24 * 60) // added jitter is bound by jitterFactor * period - sliding = true // This means the period with jitter will be calculated after each report() call. - ) - - wait.JitterUntilWithContext(ctx, report, j.cfg.Period, jitterFactor, sliding) - - j.cfg.Logger.Info("Stopping telemetry job") - return nil } - -var _ manager.Runnable = &Job{} diff --git a/internal/mode/static/telemetry/job_test.go b/internal/mode/static/telemetry/job_test.go index cbf455d904..5f726b9aa5 100644 --- a/internal/mode/static/telemetry/job_test.go +++ b/internal/mode/static/telemetry/job_test.go @@ -9,17 +9,19 @@ import ( . "github.com/onsi/gomega" "sigs.k8s.io/controller-runtime/pkg/log/zap" + "github.com/nginxinc/nginx-gateway-fabric/internal/framework/runnables" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry/telemetryfakes" ) var _ = Describe("Job", func() { var ( - job *telemetry.Job + cronjob *runnables.CronJob exporter *telemetryfakes.FakeExporter dataCollector *telemetryfakes.FakeDataCollector healthCollector *telemetryfakes.FakeHealthChecker expData telemetry.Data + worker func(context.Context) readyChannel chan struct{} ) const timeout = 10 * time.Second @@ -32,12 +34,13 @@ var _ = Describe("Job", func() { readyChannel = make(chan struct{}) healthCollector.GetReadyIfClosedChannelReturns(readyChannel) - job = telemetry.NewJob(telemetry.JobConfig{ - Exporter: exporter, - Logger: zap.New(), - Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the Job should report a few times - DataCollector: dataCollector, - HealthChecker: healthCollector, + worker = telemetry.CreateTelemetryJobWorker(zap.New(), exporter, dataCollector, healthCollector) + + cronjob = runnables.NewCronJob(runnables.CronJobConfig{ + Worker: worker, + Logger: zap.New(), + Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the Job should report a few times, + JitterFactor: 10.0 / (24 * 60), // added jitter is bound by jitterFactor * period }) expData = telemetry.Data{ @@ -65,7 +68,7 @@ var _ = Describe("Job", func() { errCh := make(chan error) go func() { - errCh <- job.Start(ctx) + errCh <- cronjob.Start(ctx) close(errCh) }() time.Sleep(sleep) @@ -94,7 +97,7 @@ var _ = Describe("Job", func() { errCh := make(chan error) go func() { - errCh <- job.Start(ctx) + errCh <- cronjob.Start(ctx) close(errCh) }() From 4ed99487e66e28275397dffc8d9f4bf78565a1f7 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 2 Feb 2024 11:09:44 -0800 Subject: [PATCH 30/43] Add FIXME and and small comment --- deploy/helm-chart/templates/rbac.yaml | 2 ++ internal/mode/static/telemetry/collector.go | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/deploy/helm-chart/templates/rbac.yaml b/deploy/helm-chart/templates/rbac.yaml index 90414f7975..c69a995ad9 100644 --- a/deploy/helm-chart/templates/rbac.yaml +++ b/deploy/helm-chart/templates/rbac.yaml @@ -21,6 +21,8 @@ rules: - namespaces - services - secrets +# FIXME(bjee19): make nodes permission dependent on telemetry being enabled. +# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317. - nodes verbs: - list diff --git a/internal/mode/static/telemetry/collector.go b/internal/mode/static/telemetry/collector.go index 36275a1587..416b004eef 100644 --- a/internal/mode/static/telemetry/collector.go +++ b/internal/mode/static/telemetry/collector.go @@ -33,7 +33,8 @@ type NGFResourceCounts struct { HTTPRoutes int Secrets int Services int - Endpoints int + // Endpoints include the total count of Endpoints(IP:port) across all referenced services. + Endpoints int } // ProjectMetadata stores the name of the project and the current version. From 49da1d3ff728c303211039a90a0e9ade79b4c07e Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 2 Feb 2024 11:17:07 -0800 Subject: [PATCH 31/43] Update manifests --- deploy/manifests/nginx-gateway.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/deploy/manifests/nginx-gateway.yaml b/deploy/manifests/nginx-gateway.yaml index 84636e4026..de2c2c1b11 100644 --- a/deploy/manifests/nginx-gateway.yaml +++ b/deploy/manifests/nginx-gateway.yaml @@ -32,6 +32,8 @@ rules: - namespaces - services - secrets + # FIXME(bjee19): make nodes permission dependent on telemetry being enabled. + # https://github.com/nginxinc/nginx-gateway-fabric/issues/1317. - nodes verbs: - list From 63f3a7644a8f9618acc096d0d04c795fe52938bc Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 2 Feb 2024 11:35:47 -0800 Subject: [PATCH 32/43] Add back createTelemetryJob --- deploy/helm-chart/templates/rbac.yaml | 4 +-- internal/mode/static/manager.go | 48 +++++++++++++++------------ 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/deploy/helm-chart/templates/rbac.yaml b/deploy/helm-chart/templates/rbac.yaml index c69a995ad9..97963ed1bd 100644 --- a/deploy/helm-chart/templates/rbac.yaml +++ b/deploy/helm-chart/templates/rbac.yaml @@ -21,8 +21,8 @@ rules: - namespaces - services - secrets -# FIXME(bjee19): make nodes permission dependent on telemetry being enabled. -# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317. + # FIXME(bjee19): make nodes permission dependent on telemetry being enabled. + # https://github.com/nginxinc/nginx-gateway-fabric/issues/1317. - nodes verbs: - list diff --git a/internal/mode/static/manager.go b/internal/mode/static/manager.go index 1909ee3dcc..53edc3960d 100644 --- a/internal/mode/static/manager.go +++ b/internal/mode/static/manager.go @@ -213,33 +213,13 @@ func StartManager(cfg config.Config) error { return fmt.Errorf("cannot register status updater: %w", err) } - logger := cfg.Logger.WithName("telemetryJob") - exporter := telemetry.NewLoggingExporter(cfg.Logger.WithName("telemetryExporter").V(1 /* debug */)) - - // 10 min jitter is enough per telemetry destination recommendation - // For the default period of 24 hours, jitter will be 10min /(24*60)min = 0.0069 - jitterFactor := 10.0 / (24 * 60) // added jitter is bound by jitterFactor * period - dataCollector := telemetry.NewDataCollectorImpl(telemetry.DataCollectorConfig{ K8sClientReader: mgr.GetClient(), GraphGetter: processor, ConfigurationGetter: eventHandler, Version: cfg.Version, }) - - worker := telemetry.CreateTelemetryJobWorker(logger, exporter, dataCollector, hc) - - telemetryJob := &runnables.Leader{ - Runnable: runnables.NewCronJob( - runnables.CronJobConfig{ - Worker: worker, - Logger: logger, - Period: cfg.TelemetryReportPeriod, - JitterFactor: jitterFactor, - }, - ), - } - if err = mgr.Add(telemetryJob); err != nil { + if err = mgr.Add(createTelemetryJob(cfg, dataCollector, hc)); err != nil { return fmt.Errorf("cannot register telemetry job: %w", err) } @@ -425,6 +405,32 @@ func registerControllers( return nil } +func createTelemetryJob( + cfg config.Config, + dataCollector telemetry.DataCollector, + hc telemetry.HealthChecker, +) *runnables.Leader { + logger := cfg.Logger.WithName("telemetryJob") + exporter := telemetry.NewLoggingExporter(cfg.Logger.WithName("telemetryExporter").V(1 /* debug */)) + + worker := telemetry.CreateTelemetryJobWorker(logger, exporter, dataCollector, hc) + + // 10 min jitter is enough per telemetry destination recommendation + // For the default period of 24 hours, jitter will be 10min /(24*60)min = 0.0069 + jitterFactor := 10.0 / (24 * 60) // added jitter is bound by jitterFactor * period + + return &runnables.Leader{ + Runnable: runnables.NewCronJob( + runnables.CronJobConfig{ + Worker: worker, + Logger: logger, + Period: cfg.TelemetryReportPeriod, + JitterFactor: jitterFactor, + }, + ), + } +} + func prepareFirstEventBatchPreparerArgs( gcName string, gwNsName *types.NamespacedName, From a091bde3e87da30367b5a3f22b902dab02b34b5f Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 2 Feb 2024 15:19:41 -0800 Subject: [PATCH 33/43] Add job worker name change and test changes --- .../telemetry/{job.go => job_worker.go} | 0 .../{job_test.go => job_worker_test.go} | 54 +++++-------------- 2 files changed, 14 insertions(+), 40 deletions(-) rename internal/mode/static/telemetry/{job.go => job_worker.go} (100%) rename internal/mode/static/telemetry/{job_test.go => job_worker_test.go} (50%) diff --git a/internal/mode/static/telemetry/job.go b/internal/mode/static/telemetry/job_worker.go similarity index 100% rename from internal/mode/static/telemetry/job.go rename to internal/mode/static/telemetry/job_worker.go diff --git a/internal/mode/static/telemetry/job_test.go b/internal/mode/static/telemetry/job_worker_test.go similarity index 50% rename from internal/mode/static/telemetry/job_test.go rename to internal/mode/static/telemetry/job_worker_test.go index 5f726b9aa5..0190731b49 100644 --- a/internal/mode/static/telemetry/job_test.go +++ b/internal/mode/static/telemetry/job_worker_test.go @@ -2,21 +2,18 @@ package telemetry_test import ( "context" - "errors" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "sigs.k8s.io/controller-runtime/pkg/log/zap" - "github.com/nginxinc/nginx-gateway-fabric/internal/framework/runnables" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry" "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry/telemetryfakes" ) -var _ = Describe("Job", func() { +var _ = Describe("Job Worker", func() { var ( - cronjob *runnables.CronJob exporter *telemetryfakes.FakeExporter dataCollector *telemetryfakes.FakeDataCollector healthCollector *telemetryfakes.FakeHealthChecker @@ -36,13 +33,6 @@ var _ = Describe("Job", func() { worker = telemetry.CreateTelemetryJobWorker(zap.New(), exporter, dataCollector, healthCollector) - cronjob = runnables.NewCronJob(runnables.CronJobConfig{ - Worker: worker, - Logger: zap.New(), - Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the Job should report a few times, - JitterFactor: 10.0 / (24 * 60), // added jitter is bound by jitterFactor * period - }) - expData = telemetry.Data{ ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: "1.1"}, NodeCount: 3, @@ -59,54 +49,38 @@ var _ = Describe("Job", func() { }) DescribeTable( - "Job runs with a few reports without any errors", - func(exporterError error, sleep time.Duration) { - // The fact that exporter return an error must not affect how many times the Job makes a report. - exporter.ExportReturns(exporterError) - + "Job worker runs without any errors", + func(sleep time.Duration) { ctx, cancel := context.WithTimeout(context.Background(), timeout) - errCh := make(chan error) go func() { - errCh <- cronjob.Start(ctx) - close(errCh) + time.Sleep(sleep) + close(readyChannel) }() - time.Sleep(sleep) - close(readyChannel) - - const minReports = 2 // ensure that the Job reports more than once: it doesn't exit after the first report - - Eventually(exporter.ExportCallCount).Should(BeNumerically(">=", minReports)) - for i := 0; i < minReports; i++ { - _, data := exporter.ExportArgsForCall(i) - Expect(data).To(Equal(expData)) - } + worker(ctx) + _, data := exporter.ExportArgsForCall(0) + Expect(data).To(Equal(expData)) cancel() - Eventually(errCh).Should(Receive(BeNil())) - Eventually(errCh).Should(BeClosed()) }, - Entry("Job runs with Exporter not returning errors", nil, time.Duration(0)), - Entry("Job runs with Exporter returning an error", errors.New("some error"), time.Duration(0)), - Entry("Job runs with extended time waiting for NGF Pod to be ready", nil, 500*time.Millisecond), + Entry("Job worker runs with NGF Pod ready", time.Duration(0)), + Entry("Job worker runs with extended time waiting for NGF Pod to be ready", 1*time.Second), ) - When("job context is canceled", func() { + When("job worker context is canceled", func() { It("should gracefully exist", func() { ctx, cancel := context.WithTimeout(context.Background(), timeout) - errCh := make(chan error) go func() { - errCh <- cronjob.Start(ctx) - close(errCh) + worker(ctx) }() sleep := 500 * time.Millisecond time.Sleep(sleep) cancel() - Eventually(errCh).Should(Receive()) - Eventually(errCh).Should(BeClosed()) + Expect(dataCollector.CollectCallCount()).To(BeZero()) + Expect(exporter.ExportCallCount()).To(BeZero()) }) }) }) From 6caf02317fc205f0e1aea2a57a0ed9aa967b9b5e Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 2 Feb 2024 15:59:10 -0800 Subject: [PATCH 34/43] Change job worker tests to be unit tests --- .../mode/static/telemetry/job_worker_test.go | 136 +++++++++--------- 1 file changed, 64 insertions(+), 72 deletions(-) diff --git a/internal/mode/static/telemetry/job_worker_test.go b/internal/mode/static/telemetry/job_worker_test.go index 0190731b49..5209c0055d 100644 --- a/internal/mode/static/telemetry/job_worker_test.go +++ b/internal/mode/static/telemetry/job_worker_test.go @@ -2,9 +2,9 @@ package telemetry_test import ( "context" + "testing" "time" - . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "sigs.k8s.io/controller-runtime/pkg/log/zap" @@ -12,75 +12,67 @@ import ( "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry/telemetryfakes" ) -var _ = Describe("Job Worker", func() { - var ( - exporter *telemetryfakes.FakeExporter - dataCollector *telemetryfakes.FakeDataCollector - healthCollector *telemetryfakes.FakeHealthChecker - expData telemetry.Data - worker func(context.Context) - readyChannel chan struct{} - ) - const timeout = 10 * time.Second - - BeforeEach(func() { - exporter = &telemetryfakes.FakeExporter{} - dataCollector = &telemetryfakes.FakeDataCollector{} - healthCollector = &telemetryfakes.FakeHealthChecker{} - - readyChannel = make(chan struct{}) - healthCollector.GetReadyIfClosedChannelReturns(readyChannel) - - worker = telemetry.CreateTelemetryJobWorker(zap.New(), exporter, dataCollector, healthCollector) - - expData = telemetry.Data{ - ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: "1.1"}, - NodeCount: 3, - NGFResourceCounts: telemetry.NGFResourceCounts{ - Gateways: 1, - GatewayClasses: 1, - HTTPRoutes: 1, - Secrets: 1, - Services: 1, - Endpoints: 1, - }, - } - dataCollector.CollectReturns(expData, nil) - }) - - DescribeTable( - "Job worker runs without any errors", - func(sleep time.Duration) { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - - go func() { - time.Sleep(sleep) - close(readyChannel) - }() - - worker(ctx) - _, data := exporter.ExportArgsForCall(0) - Expect(data).To(Equal(expData)) - cancel() +func TestCreateTelemetryJobWorker(t *testing.T) { + g := NewWithT(t) + + exporter := &telemetryfakes.FakeExporter{} + dataCollector := &telemetryfakes.FakeDataCollector{} + healthCollector := &telemetryfakes.FakeHealthChecker{} + + readyChannel := make(chan struct{}) + healthCollector.GetReadyIfClosedChannelReturns(readyChannel) + + worker := telemetry.CreateTelemetryJobWorker(zap.New(), exporter, dataCollector, healthCollector) + + expData := telemetry.Data{ + ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: "1.1"}, + NodeCount: 3, + NGFResourceCounts: telemetry.NGFResourceCounts{ + Gateways: 1, + GatewayClasses: 1, + HTTPRoutes: 1, + Secrets: 1, + Services: 1, + Endpoints: 1, }, - Entry("Job worker runs with NGF Pod ready", time.Duration(0)), - Entry("Job worker runs with extended time waiting for NGF Pod to be ready", 1*time.Second), - ) - - When("job worker context is canceled", func() { - It("should gracefully exist", func() { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - - go func() { - worker(ctx) - }() - - sleep := 500 * time.Millisecond - time.Sleep(sleep) - cancel() - - Expect(dataCollector.CollectCallCount()).To(BeZero()) - Expect(exporter.ExportCallCount()).To(BeZero()) - }) - }) -}) + } + dataCollector.CollectReturns(expData, nil) + + timeout := 10 * time.Second + ctx, cancel := context.WithTimeout(context.Background(), timeout) + + go func() { + time.Sleep(1 * time.Second) + close(readyChannel) + }() + + worker(ctx) + _, data := exporter.ExportArgsForCall(0) + g.Expect(data).To(Equal(expData)) + cancel() +} + +func TestCreateTelemetryJobWorker_ContextCanceled(t *testing.T) { + g := NewWithT(t) + + exporter := &telemetryfakes.FakeExporter{} + dataCollector := &telemetryfakes.FakeDataCollector{} + healthCollector := &telemetryfakes.FakeHealthChecker{} + + readyChannel := make(chan struct{}) + healthCollector.GetReadyIfClosedChannelReturns(readyChannel) + + worker := telemetry.CreateTelemetryJobWorker(zap.New(), exporter, dataCollector, healthCollector) + + timeout := 10 * time.Second + ctx, cancel := context.WithTimeout(context.Background(), timeout) + + go func() { + worker(ctx) + }() + + cancel() + + g.Expect(dataCollector.CollectCallCount()).To(BeZero()) + g.Expect(exporter.ExportCallCount()).To(BeZero()) +} From 9a1f99beabbbd6dc3214aea6daf64dd84380547e Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Fri, 2 Feb 2024 18:36:55 -0800 Subject: [PATCH 35/43] Add generic ReadyChannel to cronjob --- internal/framework/runnables/cronjob.go | 9 +++ internal/framework/runnables/cronjob_test.go | 53 +++++++++++++- internal/mode/static/handler_test.go | 6 +- internal/mode/static/health.go | 18 ++--- internal/mode/static/manager.go | 3 +- internal/mode/static/telemetry/job_worker.go | 13 +--- .../mode/static/telemetry/job_worker_test.go | 29 +------- .../telemetryfakes/fake_health_checker.go | 70 +++++++++---------- 8 files changed, 112 insertions(+), 89 deletions(-) diff --git a/internal/framework/runnables/cronjob.go b/internal/framework/runnables/cronjob.go index cc48bda91b..eed2ad4d07 100644 --- a/internal/framework/runnables/cronjob.go +++ b/internal/framework/runnables/cronjob.go @@ -13,6 +13,8 @@ import ( type CronJobConfig struct { // Worker is the function that will be run for every cronjob iteration. Worker func(context.Context) + // ReadyCh represents if the cronjob is ready to start. + ReadyCh <-chan struct{} // Logger is the logger. Logger logr.Logger // Period defines the period of the cronjob. The cronjob will run every Period. @@ -37,6 +39,13 @@ func NewCronJob(cfg CronJobConfig) *CronJob { // Start starts the cronjob. // Implements controller-runtime manager.Runnable func (j *CronJob) Start(ctx context.Context) error { + select { + case <-j.cfg.ReadyCh: + case <-ctx.Done(): + j.cfg.Logger.Info("Context canceled, failed to start cronjob") + return ctx.Err() + } + j.cfg.Logger.Info("Starting cronjob") sliding := true // This means the period with jitter will be calculated after each worker call. diff --git a/internal/framework/runnables/cronjob_test.go b/internal/framework/runnables/cronjob_test.go index 3faca2e160..e6382cf3f7 100644 --- a/internal/framework/runnables/cronjob_test.go +++ b/internal/framework/runnables/cronjob_test.go @@ -7,11 +7,18 @@ import ( . "github.com/onsi/gomega" "sigs.k8s.io/controller-runtime/pkg/log/zap" + + "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry/telemetryfakes" ) func TestCronJob(t *testing.T) { g := NewWithT(t) + healthCollector := &telemetryfakes.FakeHealthChecker{} + + readyChannel := make(chan struct{}) + healthCollector.GetReadyChReturns(readyChannel) + timeout := 10 * time.Second var callCount int @@ -22,9 +29,10 @@ func TestCronJob(t *testing.T) { } cfg := CronJobConfig{ - Worker: worker, - Logger: zap.New(), - Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the CronJob should run a few times + Worker: worker, + Logger: zap.New(), + Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the CronJob should run a few times + ReadyCh: healthCollector.GetReadyCh(), } job := NewCronJob(cfg) @@ -35,6 +43,7 @@ func TestCronJob(t *testing.T) { errCh <- job.Start(ctx) close(errCh) }() + close(readyChannel) minReports := 2 // ensure that the CronJob reports more than once: it doesn't exit after the first run @@ -44,3 +53,41 @@ func TestCronJob(t *testing.T) { g.Eventually(errCh).Should(Receive(BeNil())) g.Eventually(errCh).Should(BeClosed()) } + +func TestCronJob_ContextCanceled(t *testing.T) { + g := NewWithT(t) + + healthCollector := &telemetryfakes.FakeHealthChecker{} + + readyChannel := make(chan struct{}) + healthCollector.GetReadyChReturns(readyChannel) + + timeout := 10 * time.Second + var callCount int + + valCh := make(chan int, 128) + worker := func(context.Context) { + callCount++ + valCh <- callCount + } + + cfg := CronJobConfig{ + Worker: worker, + Logger: zap.New(), + Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the CronJob should run a few times + ReadyCh: healthCollector.GetReadyCh(), + } + job := NewCronJob(cfg) + + ctx, cancel := context.WithTimeout(context.Background(), timeout) + + errCh := make(chan error) + go func() { + errCh <- job.Start(ctx) + close(errCh) + }() + cancel() + + g.Eventually(errCh).Should(Receive()) + g.Eventually(errCh).Should(BeClosed()) +} diff --git a/internal/mode/static/handler_test.go b/internal/mode/static/handler_test.go index 449d82f614..e756b5f356 100644 --- a/internal/mode/static/handler_test.go +++ b/internal/mode/static/handler_test.go @@ -429,7 +429,7 @@ var _ = Describe("eventHandler", func() { It("should set the health checker status properly when there are changes", func() { e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} batch := []interface{}{e} - readyChannel := handler.cfg.healthChecker.GetReadyIfClosedChannel() + readyChannel := handler.cfg.healthChecker.GetReadyCh() fakeProcessor.ProcessReturns(state.ClusterStateChange, &graph.Graph{}) @@ -446,7 +446,7 @@ var _ = Describe("eventHandler", func() { It("should set the health checker status properly when there are no changes or errors", func() { e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} batch := []interface{}{e} - readyChannel := handler.cfg.healthChecker.GetReadyIfClosedChannel() + readyChannel := handler.cfg.healthChecker.GetReadyCh() Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed()) handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) @@ -461,7 +461,7 @@ var _ = Describe("eventHandler", func() { It("should set the health checker status properly when there is an error", func() { e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} batch := []interface{}{e} - readyChannel := handler.cfg.healthChecker.GetReadyIfClosedChannel() + readyChannel := handler.cfg.healthChecker.GetReadyCh() fakeProcessor.ProcessReturns(state.ClusterStateChange, &graph.Graph{}) fakeNginxRuntimeMgr.ReloadReturns(errors.New("reload error")) diff --git a/internal/mode/static/health.go b/internal/mode/static/health.go index 0efc5d4676..762915afbe 100644 --- a/internal/mode/static/health.go +++ b/internal/mode/static/health.go @@ -9,7 +9,7 @@ import ( // newHealthCheckerImpl creates a new healthCheckerImpl. func newHealthCheckerImpl() *healthCheckerImpl { return &healthCheckerImpl{ - readyIfClosedChan: make(chan struct{}), + readyCh: make(chan struct{}), } } @@ -18,10 +18,10 @@ type healthCheckerImpl struct { // firstBatchError is set when the first batch fails to configure nginx // and we don't want to set ourselves as ready on the next batch if nothing changes firstBatchError error - // readyIfClosedChan is a channel that is initialized in NewHealthCheckerImpl and represents if the NGF Pod is ready. - readyIfClosedChan chan struct{} - lock sync.RWMutex - ready bool + // readyCh is a channel that is initialized in NewHealthCheckerImpl and represents if the NGF Pod is ready. + readyCh chan struct{} + lock sync.RWMutex + ready bool } // readyCheck returns the ready-state of the Pod. It satisfies the controller-runtime Checker type. @@ -45,10 +45,10 @@ func (h *healthCheckerImpl) setAsReady() { h.ready = true h.firstBatchError = nil - close(h.readyIfClosedChan) + close(h.readyCh) } -// GetReadyIfClosedChannel returns a read-only channel, which determines if the NGF Pod is ready. -func (h *healthCheckerImpl) GetReadyIfClosedChannel() <-chan struct{} { - return h.readyIfClosedChan +// GetReadyCh returns a read-only channel, which determines if the NGF Pod is ready. +func (h *healthCheckerImpl) GetReadyCh() <-chan struct{} { + return h.readyCh } diff --git a/internal/mode/static/manager.go b/internal/mode/static/manager.go index 53edc3960d..6334cecd8b 100644 --- a/internal/mode/static/manager.go +++ b/internal/mode/static/manager.go @@ -413,7 +413,7 @@ func createTelemetryJob( logger := cfg.Logger.WithName("telemetryJob") exporter := telemetry.NewLoggingExporter(cfg.Logger.WithName("telemetryExporter").V(1 /* debug */)) - worker := telemetry.CreateTelemetryJobWorker(logger, exporter, dataCollector, hc) + worker := telemetry.CreateTelemetryJobWorker(logger, exporter, dataCollector) // 10 min jitter is enough per telemetry destination recommendation // For the default period of 24 hours, jitter will be 10min /(24*60)min = 0.0069 @@ -426,6 +426,7 @@ func createTelemetryJob( Logger: logger, Period: cfg.TelemetryReportPeriod, JitterFactor: jitterFactor, + ReadyCh: hc.GetReadyCh(), }, ), } diff --git a/internal/mode/static/telemetry/job_worker.go b/internal/mode/static/telemetry/job_worker.go index 2ef39631a5..640603b624 100644 --- a/internal/mode/static/telemetry/job_worker.go +++ b/internal/mode/static/telemetry/job_worker.go @@ -18,25 +18,16 @@ type DataCollector interface { // HealthChecker checks if the NGF Pod is ready. type HealthChecker interface { - // GetReadyIfClosedChannel returns a channel which determines if the NGF Pod is ready. - GetReadyIfClosedChannel() <-chan struct{} + // GetReadyCh returns a channel which determines if the NGF Pod is ready. + GetReadyCh() <-chan struct{} } func CreateTelemetryJobWorker( logger logr.Logger, exporter Exporter, dataCollector DataCollector, - healthChecker HealthChecker, ) func(ctx context.Context) { return func(ctx context.Context) { - readyChannel := healthChecker.GetReadyIfClosedChannel() - select { - case <-readyChannel: - case <-ctx.Done(): - logger.Info("Context canceled, failed to start telemetry job") - return - } - // Gather telemetry logger.V(1).Info("Gathering telemetry data") diff --git a/internal/mode/static/telemetry/job_worker_test.go b/internal/mode/static/telemetry/job_worker_test.go index 5209c0055d..3c8932a8d4 100644 --- a/internal/mode/static/telemetry/job_worker_test.go +++ b/internal/mode/static/telemetry/job_worker_test.go @@ -20,9 +20,9 @@ func TestCreateTelemetryJobWorker(t *testing.T) { healthCollector := &telemetryfakes.FakeHealthChecker{} readyChannel := make(chan struct{}) - healthCollector.GetReadyIfClosedChannelReturns(readyChannel) + healthCollector.GetReadyChReturns(readyChannel) - worker := telemetry.CreateTelemetryJobWorker(zap.New(), exporter, dataCollector, healthCollector) + worker := telemetry.CreateTelemetryJobWorker(zap.New(), exporter, dataCollector) expData := telemetry.Data{ ProjectMetadata: telemetry.ProjectMetadata{Name: "NGF", Version: "1.1"}, @@ -51,28 +51,3 @@ func TestCreateTelemetryJobWorker(t *testing.T) { g.Expect(data).To(Equal(expData)) cancel() } - -func TestCreateTelemetryJobWorker_ContextCanceled(t *testing.T) { - g := NewWithT(t) - - exporter := &telemetryfakes.FakeExporter{} - dataCollector := &telemetryfakes.FakeDataCollector{} - healthCollector := &telemetryfakes.FakeHealthChecker{} - - readyChannel := make(chan struct{}) - healthCollector.GetReadyIfClosedChannelReturns(readyChannel) - - worker := telemetry.CreateTelemetryJobWorker(zap.New(), exporter, dataCollector, healthCollector) - - timeout := 10 * time.Second - ctx, cancel := context.WithTimeout(context.Background(), timeout) - - go func() { - worker(ctx) - }() - - cancel() - - g.Expect(dataCollector.CollectCallCount()).To(BeZero()) - g.Expect(exporter.ExportCallCount()).To(BeZero()) -} diff --git a/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go b/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go index ead2601381..09325e529b 100644 --- a/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go +++ b/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go @@ -8,29 +8,29 @@ import ( ) type FakeHealthChecker struct { - GetReadyIfClosedChannelStub func() <-chan struct{} - getReadyIfClosedChannelMutex sync.RWMutex - getReadyIfClosedChannelArgsForCall []struct { + GetReadyChStub func() <-chan struct{} + getReadyChMutex sync.RWMutex + getReadyChArgsForCall []struct { } - getReadyIfClosedChannelReturns struct { + getReadyChReturns struct { result1 <-chan struct{} } - getReadyIfClosedChannelReturnsOnCall map[int]struct { + getReadyChReturnsOnCall map[int]struct { result1 <-chan struct{} } invocations map[string][][]interface{} invocationsMutex sync.RWMutex } -func (fake *FakeHealthChecker) GetReadyIfClosedChannel() <-chan struct{} { - fake.getReadyIfClosedChannelMutex.Lock() - ret, specificReturn := fake.getReadyIfClosedChannelReturnsOnCall[len(fake.getReadyIfClosedChannelArgsForCall)] - fake.getReadyIfClosedChannelArgsForCall = append(fake.getReadyIfClosedChannelArgsForCall, struct { +func (fake *FakeHealthChecker) GetReadyCh() <-chan struct{} { + fake.getReadyChMutex.Lock() + ret, specificReturn := fake.getReadyChReturnsOnCall[len(fake.getReadyChArgsForCall)] + fake.getReadyChArgsForCall = append(fake.getReadyChArgsForCall, struct { }{}) - stub := fake.GetReadyIfClosedChannelStub - fakeReturns := fake.getReadyIfClosedChannelReturns - fake.recordInvocation("GetReadyIfClosedChannel", []interface{}{}) - fake.getReadyIfClosedChannelMutex.Unlock() + stub := fake.GetReadyChStub + fakeReturns := fake.getReadyChReturns + fake.recordInvocation("GetReadyCh", []interface{}{}) + fake.getReadyChMutex.Unlock() if stub != nil { return stub() } @@ -40,37 +40,37 @@ func (fake *FakeHealthChecker) GetReadyIfClosedChannel() <-chan struct{} { return fakeReturns.result1 } -func (fake *FakeHealthChecker) GetReadyIfClosedChannelCallCount() int { - fake.getReadyIfClosedChannelMutex.RLock() - defer fake.getReadyIfClosedChannelMutex.RUnlock() - return len(fake.getReadyIfClosedChannelArgsForCall) +func (fake *FakeHealthChecker) GetReadyChCallCount() int { + fake.getReadyChMutex.RLock() + defer fake.getReadyChMutex.RUnlock() + return len(fake.getReadyChArgsForCall) } -func (fake *FakeHealthChecker) GetReadyIfClosedChannelCalls(stub func() <-chan struct{}) { - fake.getReadyIfClosedChannelMutex.Lock() - defer fake.getReadyIfClosedChannelMutex.Unlock() - fake.GetReadyIfClosedChannelStub = stub +func (fake *FakeHealthChecker) GetReadyChCalls(stub func() <-chan struct{}) { + fake.getReadyChMutex.Lock() + defer fake.getReadyChMutex.Unlock() + fake.GetReadyChStub = stub } -func (fake *FakeHealthChecker) GetReadyIfClosedChannelReturns(result1 <-chan struct{}) { - fake.getReadyIfClosedChannelMutex.Lock() - defer fake.getReadyIfClosedChannelMutex.Unlock() - fake.GetReadyIfClosedChannelStub = nil - fake.getReadyIfClosedChannelReturns = struct { +func (fake *FakeHealthChecker) GetReadyChReturns(result1 <-chan struct{}) { + fake.getReadyChMutex.Lock() + defer fake.getReadyChMutex.Unlock() + fake.GetReadyChStub = nil + fake.getReadyChReturns = struct { result1 <-chan struct{} }{result1} } -func (fake *FakeHealthChecker) GetReadyIfClosedChannelReturnsOnCall(i int, result1 <-chan struct{}) { - fake.getReadyIfClosedChannelMutex.Lock() - defer fake.getReadyIfClosedChannelMutex.Unlock() - fake.GetReadyIfClosedChannelStub = nil - if fake.getReadyIfClosedChannelReturnsOnCall == nil { - fake.getReadyIfClosedChannelReturnsOnCall = make(map[int]struct { +func (fake *FakeHealthChecker) GetReadyChReturnsOnCall(i int, result1 <-chan struct{}) { + fake.getReadyChMutex.Lock() + defer fake.getReadyChMutex.Unlock() + fake.GetReadyChStub = nil + if fake.getReadyChReturnsOnCall == nil { + fake.getReadyChReturnsOnCall = make(map[int]struct { result1 <-chan struct{} }) } - fake.getReadyIfClosedChannelReturnsOnCall[i] = struct { + fake.getReadyChReturnsOnCall[i] = struct { result1 <-chan struct{} }{result1} } @@ -78,8 +78,8 @@ func (fake *FakeHealthChecker) GetReadyIfClosedChannelReturnsOnCall(i int, resul func (fake *FakeHealthChecker) Invocations() map[string][][]interface{} { fake.invocationsMutex.RLock() defer fake.invocationsMutex.RUnlock() - fake.getReadyIfClosedChannelMutex.RLock() - defer fake.getReadyIfClosedChannelMutex.RUnlock() + fake.getReadyChMutex.RLock() + defer fake.getReadyChMutex.RUnlock() copiedInvocations := map[string][][]interface{}{} for key, value := range fake.invocations { copiedInvocations[key] = value From 8119c21bbe56822ff6342be80bbf81f7d4c35901 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Sat, 3 Feb 2024 13:21:44 -0800 Subject: [PATCH 36/43] Refactor collector tests to add list calls helper function --- .../mode/static/telemetry/collector_test.go | 78 +++++++++---------- 1 file changed, 35 insertions(+), 43 deletions(-) diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index b364b86725..26e5f069e8 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -21,6 +21,24 @@ import ( "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry/telemetryfakes" ) +func createListCallsFunc(nodes []v1.Node) func( + ctx context.Context, + list client.ObjectList, + option ...client.ListOption, +) error { + return func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { + Expect(option).To(BeEmpty()) + + switch typedList := list.(type) { + case *v1.NodeList: + typedList.Items = append(typedList.Items, nodes...) + default: + Fail(fmt.Sprintf("unknown type: %T", typedList)) + } + return nil + } +} + var _ = Describe("Collector", Ordered, func() { var ( k8sClientReader *eventsfakes.FakeReader @@ -62,27 +80,19 @@ var _ = Describe("Collector", Ordered, func() { Describe("Normal case", func() { When("collecting telemetry data", func() { It("collects all fields", func() { - node := v1.Node{ - ObjectMeta: metav1.ObjectMeta{Name: "node1"}, - } - node2 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{Name: "node2"}, - } - node3 := v1.Node{ - ObjectMeta: metav1.ObjectMeta{Name: "node3"}, + nodes := []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{Name: "node1"}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "node2"}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "node3"}, + }, } - k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { - Expect(option).To(BeEmpty()) - - switch typedList := list.(type) { - case *v1.NodeList: - typedList.Items = append(typedList.Items, node, node2, node3) - default: - Fail(fmt.Sprintf("unknown type: %T", typedList)) - } - return nil - }) + k8sClientReader.ListCalls(createListCallsFunc(nodes)) secret1 := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret1"}} secret2 := &v1.Secret{ObjectMeta: metav1.ObjectMeta{Name: "secret2"}} @@ -195,17 +205,7 @@ var _ = Describe("Collector", Ordered, func() { Describe("node count collector", func() { When("collecting node count data", func() { It("collects correct data for no nodes", func() { - k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { - Expect(option).To(BeEmpty()) - - switch typedList := list.(type) { - case *v1.NodeList: - typedList.Items = []v1.Node{} - default: - Fail(fmt.Sprintf("unknown type: %T", typedList)) - } - return nil - }) + k8sClientReader.ListCalls(createListCallsFunc(nil)) data, err := dataCollector.Collect(ctx) @@ -214,21 +214,13 @@ var _ = Describe("Collector", Ordered, func() { }) It("collects correct data for one node", func() { - node := v1.Node{ - ObjectMeta: metav1.ObjectMeta{Name: "node1"}, + nodes := []v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{Name: "node1"}, + }, } - k8sClientReader.ListCalls(func(ctx context.Context, list client.ObjectList, option ...client.ListOption) error { - Expect(option).To(BeEmpty()) - - switch typedList := list.(type) { - case *v1.NodeList: - typedList.Items = append(typedList.Items, node) - default: - Fail(fmt.Sprintf("unknown type: %T", typedList)) - } - return nil - }) + k8sClientReader.ListCalls(createListCallsFunc(nodes)) expData.NodeCount = 1 From 455512a22af8107e071b54fd0ae0621d5656ed31 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Sun, 4 Feb 2024 13:29:16 -0800 Subject: [PATCH 37/43] Remove health checker interface --- internal/framework/runnables/cronjob_test.go | 12 +-- internal/mode/static/handler.go | 2 +- internal/mode/static/handler_test.go | 8 +- internal/mode/static/health.go | 20 ++-- internal/mode/static/health_test.go | 2 +- internal/mode/static/manager.go | 8 +- internal/mode/static/telemetry/job_worker.go | 8 -- .../mode/static/telemetry/job_worker_test.go | 9 -- .../telemetryfakes/fake_health_checker.go | 102 ------------------ 9 files changed, 22 insertions(+), 149 deletions(-) delete mode 100644 internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go diff --git a/internal/framework/runnables/cronjob_test.go b/internal/framework/runnables/cronjob_test.go index e6382cf3f7..d330652705 100644 --- a/internal/framework/runnables/cronjob_test.go +++ b/internal/framework/runnables/cronjob_test.go @@ -7,17 +7,12 @@ import ( . "github.com/onsi/gomega" "sigs.k8s.io/controller-runtime/pkg/log/zap" - - "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry/telemetryfakes" ) func TestCronJob(t *testing.T) { g := NewWithT(t) - healthCollector := &telemetryfakes.FakeHealthChecker{} - readyChannel := make(chan struct{}) - healthCollector.GetReadyChReturns(readyChannel) timeout := 10 * time.Second var callCount int @@ -32,7 +27,7 @@ func TestCronJob(t *testing.T) { Worker: worker, Logger: zap.New(), Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the CronJob should run a few times - ReadyCh: healthCollector.GetReadyCh(), + ReadyCh: readyChannel, } job := NewCronJob(cfg) @@ -57,10 +52,7 @@ func TestCronJob(t *testing.T) { func TestCronJob_ContextCanceled(t *testing.T) { g := NewWithT(t) - healthCollector := &telemetryfakes.FakeHealthChecker{} - readyChannel := make(chan struct{}) - healthCollector.GetReadyChReturns(readyChannel) timeout := 10 * time.Second var callCount int @@ -75,7 +67,7 @@ func TestCronJob_ContextCanceled(t *testing.T) { Worker: worker, Logger: zap.New(), Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the CronJob should run a few times - ReadyCh: healthCollector.GetReadyCh(), + ReadyCh: readyChannel, } job := NewCronJob(cfg) diff --git a/internal/mode/static/handler.go b/internal/mode/static/handler.go index 603f3f91ec..f7448c646c 100644 --- a/internal/mode/static/handler.go +++ b/internal/mode/static/handler.go @@ -60,7 +60,7 @@ type eventHandlerConfig struct { // metricsCollector collects metrics for this controller. metricsCollector handlerMetricsCollector // healthChecker sets the health of the Pod to Ready once we've written out our initial config. - healthChecker *healthCheckerImpl + healthChecker *nginxConfiguredOnStartChecker // controlConfigNSName is the NamespacedName of the NginxGateway config for this controller. controlConfigNSName types.NamespacedName // version is the current version number of the nginx config. diff --git a/internal/mode/static/handler_test.go b/internal/mode/static/handler_test.go index e756b5f356..424fc315d2 100644 --- a/internal/mode/static/handler_test.go +++ b/internal/mode/static/handler_test.go @@ -84,7 +84,7 @@ var _ = Describe("eventHandler", func() { nginxRuntimeMgr: fakeNginxRuntimeMgr, statusUpdater: fakeStatusUpdater, eventRecorder: fakeEventRecorder, - healthChecker: newHealthCheckerImpl(), + healthChecker: newNginxConfiguredOnStartChecker(), controlConfigNSName: types.NamespacedName{Namespace: namespace, Name: configName}, gatewayPodConfig: config.GatewayPodConfig{ ServiceName: "nginx-gateway", @@ -429,7 +429,7 @@ var _ = Describe("eventHandler", func() { It("should set the health checker status properly when there are changes", func() { e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} batch := []interface{}{e} - readyChannel := handler.cfg.healthChecker.GetReadyCh() + readyChannel := handler.cfg.healthChecker.getReadyCh() fakeProcessor.ProcessReturns(state.ClusterStateChange, &graph.Graph{}) @@ -446,7 +446,7 @@ var _ = Describe("eventHandler", func() { It("should set the health checker status properly when there are no changes or errors", func() { e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} batch := []interface{}{e} - readyChannel := handler.cfg.healthChecker.GetReadyCh() + readyChannel := handler.cfg.healthChecker.getReadyCh() Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed()) handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) @@ -461,7 +461,7 @@ var _ = Describe("eventHandler", func() { It("should set the health checker status properly when there is an error", func() { e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} batch := []interface{}{e} - readyChannel := handler.cfg.healthChecker.GetReadyCh() + readyChannel := handler.cfg.healthChecker.getReadyCh() fakeProcessor.ProcessReturns(state.ClusterStateChange, &graph.Graph{}) fakeNginxRuntimeMgr.ReloadReturns(errors.New("reload error")) diff --git a/internal/mode/static/health.go b/internal/mode/static/health.go index 762915afbe..180c67d643 100644 --- a/internal/mode/static/health.go +++ b/internal/mode/static/health.go @@ -6,19 +6,19 @@ import ( "sync" ) -// newHealthCheckerImpl creates a new healthCheckerImpl. -func newHealthCheckerImpl() *healthCheckerImpl { - return &healthCheckerImpl{ +// newNginxConfiguredOnStartChecker creates a new nginxConfiguredOnStartChecker. +func newNginxConfiguredOnStartChecker() *nginxConfiguredOnStartChecker { + return &nginxConfiguredOnStartChecker{ readyCh: make(chan struct{}), } } -// healthCheckerImpl implements HealthChecker. -type healthCheckerImpl struct { +// nginxConfiguredOnStartChecker is used to check if nginx is successfully configured and if the NGF Pod is ready. +type nginxConfiguredOnStartChecker struct { // firstBatchError is set when the first batch fails to configure nginx // and we don't want to set ourselves as ready on the next batch if nothing changes firstBatchError error - // readyCh is a channel that is initialized in NewHealthCheckerImpl and represents if the NGF Pod is ready. + // readyCh is a channel that is initialized in newNginxConfiguredOnStartChecker and represents if the NGF Pod is ready. readyCh chan struct{} lock sync.RWMutex ready bool @@ -27,7 +27,7 @@ type healthCheckerImpl struct { // readyCheck returns the ready-state of the Pod. It satisfies the controller-runtime Checker type. // We are considered ready after the handler processed the first batch. In case there is NGINX configuration // to write, it must be written and NGINX must be reloaded successfully. -func (h *healthCheckerImpl) readyCheck(_ *http.Request) error { +func (h *nginxConfiguredOnStartChecker) readyCheck(_ *http.Request) error { h.lock.RLock() defer h.lock.RUnlock() @@ -39,7 +39,7 @@ func (h *healthCheckerImpl) readyCheck(_ *http.Request) error { } // setAsReady marks the health check as ready. -func (h *healthCheckerImpl) setAsReady() { +func (h *nginxConfiguredOnStartChecker) setAsReady() { h.lock.Lock() defer h.lock.Unlock() @@ -48,7 +48,7 @@ func (h *healthCheckerImpl) setAsReady() { close(h.readyCh) } -// GetReadyCh returns a read-only channel, which determines if the NGF Pod is ready. -func (h *healthCheckerImpl) GetReadyCh() <-chan struct{} { +// getReadyCh returns a read-only channel, which determines if the NGF Pod is ready. +func (h *nginxConfiguredOnStartChecker) getReadyCh() <-chan struct{} { return h.readyCh } diff --git a/internal/mode/static/health_test.go b/internal/mode/static/health_test.go index 5515160302..7f9f63d47d 100644 --- a/internal/mode/static/health_test.go +++ b/internal/mode/static/health_test.go @@ -8,7 +8,7 @@ import ( func TestReadyCheck(t *testing.T) { g := NewWithT(t) - hc := newHealthCheckerImpl() + hc := newNginxConfiguredOnStartChecker() g.Expect(hc.readyCheck(nil)).ToNot(Succeed()) hc.ready = true diff --git a/internal/mode/static/manager.go b/internal/mode/static/manager.go index 6334cecd8b..255ecf3a19 100644 --- a/internal/mode/static/manager.go +++ b/internal/mode/static/manager.go @@ -69,7 +69,7 @@ func init() { // nolint:gocyclo func StartManager(cfg config.Config) error { - hc := newHealthCheckerImpl() + hc := newNginxConfiguredOnStartChecker() mgr, err := createManager(cfg, hc) if err != nil { return fmt.Errorf("cannot build runtime manager: %w", err) @@ -227,7 +227,7 @@ func StartManager(cfg config.Config) error { return mgr.Start(ctx) } -func createManager(cfg config.Config, hc *healthCheckerImpl) (manager.Manager, error) { +func createManager(cfg config.Config, hc *nginxConfiguredOnStartChecker) (manager.Manager, error) { options := manager.Options{ Scheme: scheme, Logger: cfg.Logger, @@ -408,7 +408,7 @@ func registerControllers( func createTelemetryJob( cfg config.Config, dataCollector telemetry.DataCollector, - hc telemetry.HealthChecker, + hc *nginxConfiguredOnStartChecker, ) *runnables.Leader { logger := cfg.Logger.WithName("telemetryJob") exporter := telemetry.NewLoggingExporter(cfg.Logger.WithName("telemetryExporter").V(1 /* debug */)) @@ -426,7 +426,7 @@ func createTelemetryJob( Logger: logger, Period: cfg.TelemetryReportPeriod, JitterFactor: jitterFactor, - ReadyCh: hc.GetReadyCh(), + ReadyCh: hc.getReadyCh(), }, ), } diff --git a/internal/mode/static/telemetry/job_worker.go b/internal/mode/static/telemetry/job_worker.go index 640603b624..a4dc81932a 100644 --- a/internal/mode/static/telemetry/job_worker.go +++ b/internal/mode/static/telemetry/job_worker.go @@ -14,14 +14,6 @@ type DataCollector interface { Collect(ctx context.Context) (Data, error) } -//go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . HealthChecker - -// HealthChecker checks if the NGF Pod is ready. -type HealthChecker interface { - // GetReadyCh returns a channel which determines if the NGF Pod is ready. - GetReadyCh() <-chan struct{} -} - func CreateTelemetryJobWorker( logger logr.Logger, exporter Exporter, diff --git a/internal/mode/static/telemetry/job_worker_test.go b/internal/mode/static/telemetry/job_worker_test.go index 3c8932a8d4..b1c119e13c 100644 --- a/internal/mode/static/telemetry/job_worker_test.go +++ b/internal/mode/static/telemetry/job_worker_test.go @@ -17,10 +17,6 @@ func TestCreateTelemetryJobWorker(t *testing.T) { exporter := &telemetryfakes.FakeExporter{} dataCollector := &telemetryfakes.FakeDataCollector{} - healthCollector := &telemetryfakes.FakeHealthChecker{} - - readyChannel := make(chan struct{}) - healthCollector.GetReadyChReturns(readyChannel) worker := telemetry.CreateTelemetryJobWorker(zap.New(), exporter, dataCollector) @@ -41,11 +37,6 @@ func TestCreateTelemetryJobWorker(t *testing.T) { timeout := 10 * time.Second ctx, cancel := context.WithTimeout(context.Background(), timeout) - go func() { - time.Sleep(1 * time.Second) - close(readyChannel) - }() - worker(ctx) _, data := exporter.ExportArgsForCall(0) g.Expect(data).To(Equal(expData)) diff --git a/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go b/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go deleted file mode 100644 index 09325e529b..0000000000 --- a/internal/mode/static/telemetry/telemetryfakes/fake_health_checker.go +++ /dev/null @@ -1,102 +0,0 @@ -// Code generated by counterfeiter. DO NOT EDIT. -package telemetryfakes - -import ( - "sync" - - "github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry" -) - -type FakeHealthChecker struct { - GetReadyChStub func() <-chan struct{} - getReadyChMutex sync.RWMutex - getReadyChArgsForCall []struct { - } - getReadyChReturns struct { - result1 <-chan struct{} - } - getReadyChReturnsOnCall map[int]struct { - result1 <-chan struct{} - } - invocations map[string][][]interface{} - invocationsMutex sync.RWMutex -} - -func (fake *FakeHealthChecker) GetReadyCh() <-chan struct{} { - fake.getReadyChMutex.Lock() - ret, specificReturn := fake.getReadyChReturnsOnCall[len(fake.getReadyChArgsForCall)] - fake.getReadyChArgsForCall = append(fake.getReadyChArgsForCall, struct { - }{}) - stub := fake.GetReadyChStub - fakeReturns := fake.getReadyChReturns - fake.recordInvocation("GetReadyCh", []interface{}{}) - fake.getReadyChMutex.Unlock() - if stub != nil { - return stub() - } - if specificReturn { - return ret.result1 - } - return fakeReturns.result1 -} - -func (fake *FakeHealthChecker) GetReadyChCallCount() int { - fake.getReadyChMutex.RLock() - defer fake.getReadyChMutex.RUnlock() - return len(fake.getReadyChArgsForCall) -} - -func (fake *FakeHealthChecker) GetReadyChCalls(stub func() <-chan struct{}) { - fake.getReadyChMutex.Lock() - defer fake.getReadyChMutex.Unlock() - fake.GetReadyChStub = stub -} - -func (fake *FakeHealthChecker) GetReadyChReturns(result1 <-chan struct{}) { - fake.getReadyChMutex.Lock() - defer fake.getReadyChMutex.Unlock() - fake.GetReadyChStub = nil - fake.getReadyChReturns = struct { - result1 <-chan struct{} - }{result1} -} - -func (fake *FakeHealthChecker) GetReadyChReturnsOnCall(i int, result1 <-chan struct{}) { - fake.getReadyChMutex.Lock() - defer fake.getReadyChMutex.Unlock() - fake.GetReadyChStub = nil - if fake.getReadyChReturnsOnCall == nil { - fake.getReadyChReturnsOnCall = make(map[int]struct { - result1 <-chan struct{} - }) - } - fake.getReadyChReturnsOnCall[i] = struct { - result1 <-chan struct{} - }{result1} -} - -func (fake *FakeHealthChecker) Invocations() map[string][][]interface{} { - fake.invocationsMutex.RLock() - defer fake.invocationsMutex.RUnlock() - fake.getReadyChMutex.RLock() - defer fake.getReadyChMutex.RUnlock() - copiedInvocations := map[string][][]interface{}{} - for key, value := range fake.invocations { - copiedInvocations[key] = value - } - return copiedInvocations -} - -func (fake *FakeHealthChecker) recordInvocation(key string, args []interface{}) { - fake.invocationsMutex.Lock() - defer fake.invocationsMutex.Unlock() - if fake.invocations == nil { - fake.invocations = map[string][][]interface{}{} - } - if fake.invocations[key] == nil { - fake.invocations[key] = [][]interface{}{} - } - fake.invocations[key] = append(fake.invocations[key], args) -} - -var _ telemetry.HealthChecker = new(FakeHealthChecker) From 0025558ebef8bfe341f4b0e1ff393b13517ff167 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Sun, 4 Feb 2024 15:02:42 -0800 Subject: [PATCH 38/43] Add setLatestConfiguration --- internal/mode/static/handler.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/internal/mode/static/handler.go b/internal/mode/static/handler.go index f7448c646c..4bebd7f75f 100644 --- a/internal/mode/static/handler.go +++ b/internal/mode/static/handler.go @@ -117,9 +117,7 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, logger logr.Log h.cfg.version++ cfg := dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version) - h.lock.Lock() - h.latestConfiguration = &cfg - h.lock.Unlock() + h.setLatestConfiguration(&cfg) err = h.updateUpstreamServers( ctx, @@ -130,9 +128,7 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, logger logr.Log h.cfg.version++ cfg := dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version) - h.lock.Lock() - h.latestConfiguration = &cfg - h.lock.Unlock() + h.setLatestConfiguration(&cfg) err = h.updateNginxConf( ctx, @@ -401,9 +397,18 @@ func getGatewayAddresses( return gwAddresses, nil } +// GetLatestConfiguration gets the latest configuration. func (h *eventHandlerImpl) GetLatestConfiguration() *dataplane.Configuration { h.lock.Lock() defer h.lock.Unlock() return h.latestConfiguration } + +// setLatestConfiguration sets the latest configuration. +func (h *eventHandlerImpl) setLatestConfiguration(cfg *dataplane.Configuration) { + h.lock.Lock() + defer h.lock.Unlock() + + h.latestConfiguration = cfg +} From 36fae13f69d8ce8c80224aba5ad6e2be5111aab0 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Sun, 4 Feb 2024 15:37:12 -0800 Subject: [PATCH 39/43] Add small fixes --- internal/mode/static/handler.go | 16 +++++----- internal/mode/static/handler_test.go | 44 ++++++++++++++-------------- internal/mode/static/health_test.go | 8 ++--- internal/mode/static/manager.go | 26 ++++++++-------- 4 files changed, 47 insertions(+), 47 deletions(-) diff --git a/internal/mode/static/handler.go b/internal/mode/static/handler.go index 4bebd7f75f..74828760b3 100644 --- a/internal/mode/static/handler.go +++ b/internal/mode/static/handler.go @@ -59,8 +59,8 @@ type eventHandlerConfig struct { logLevelSetter logLevelSetter // metricsCollector collects metrics for this controller. metricsCollector handlerMetricsCollector - // healthChecker sets the health of the Pod to Ready once we've written out our initial config. - healthChecker *nginxConfiguredOnStartChecker + // nginxConfiguredOnStartChecker sets the health of the Pod to Ready once we've written out our initial config. + nginxConfiguredOnStartChecker *nginxConfiguredOnStartChecker // controlConfigNSName is the NamespacedName of the NginxGateway config for this controller. controlConfigNSName types.NamespacedName // version is the current version number of the nginx config. @@ -109,8 +109,8 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, logger logr.Log switch changeType { case state.NoChange: logger.Info("Handling events didn't result into NGINX configuration changes") - if !h.cfg.healthChecker.ready && h.cfg.healthChecker.firstBatchError == nil { - h.cfg.healthChecker.setAsReady() + if !h.cfg.nginxConfiguredOnStartChecker.ready && h.cfg.nginxConfiguredOnStartChecker.firstBatchError == nil { + h.cfg.nginxConfiguredOnStartChecker.setAsReady() } return case state.EndpointsOnlyChange: @@ -140,13 +140,13 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, logger logr.Log if err != nil { logger.Error(err, "Failed to update NGINX configuration") nginxReloadRes.error = err - if !h.cfg.healthChecker.ready { - h.cfg.healthChecker.firstBatchError = err + if !h.cfg.nginxConfiguredOnStartChecker.ready { + h.cfg.nginxConfiguredOnStartChecker.firstBatchError = err } } else { logger.Info("NGINX configuration was successfully updated") - if !h.cfg.healthChecker.ready { - h.cfg.healthChecker.setAsReady() + if !h.cfg.nginxConfiguredOnStartChecker.ready { + h.cfg.nginxConfiguredOnStartChecker.setAsReady() } } diff --git a/internal/mode/static/handler_test.go b/internal/mode/static/handler_test.go index 424fc315d2..682c2bc48c 100644 --- a/internal/mode/static/handler_test.go +++ b/internal/mode/static/handler_test.go @@ -76,23 +76,23 @@ var _ = Describe("eventHandler", func() { zapLogLevelSetter = newZapLogLevelSetter(zap.NewAtomicLevel()) handler = newEventHandlerImpl(eventHandlerConfig{ - k8sClient: fake.NewFakeClient(), - processor: fakeProcessor, - generator: fakeGenerator, - logLevelSetter: zapLogLevelSetter, - nginxFileMgr: fakeNginxFileMgr, - nginxRuntimeMgr: fakeNginxRuntimeMgr, - statusUpdater: fakeStatusUpdater, - eventRecorder: fakeEventRecorder, - healthChecker: newNginxConfiguredOnStartChecker(), - controlConfigNSName: types.NamespacedName{Namespace: namespace, Name: configName}, + k8sClient: fake.NewFakeClient(), + processor: fakeProcessor, + generator: fakeGenerator, + logLevelSetter: zapLogLevelSetter, + nginxFileMgr: fakeNginxFileMgr, + nginxRuntimeMgr: fakeNginxRuntimeMgr, + statusUpdater: fakeStatusUpdater, + eventRecorder: fakeEventRecorder, + nginxConfiguredOnStartChecker: newNginxConfiguredOnStartChecker(), + controlConfigNSName: types.NamespacedName{Namespace: namespace, Name: configName}, gatewayPodConfig: config.GatewayPodConfig{ ServiceName: "nginx-gateway", Namespace: "nginx-gateway", }, metricsCollector: collectors.NewControllerNoopCollector(), }) - Expect(handler.cfg.healthChecker.ready).To(BeFalse()) + Expect(handler.cfg.nginxConfiguredOnStartChecker.ready).To(BeFalse()) }) Describe("Process the Gateway API resources events", func() { @@ -122,7 +122,7 @@ var _ = Describe("eventHandler", func() { }) AfterEach(func() { - Expect(handler.cfg.healthChecker.ready).To(BeTrue()) + Expect(handler.cfg.nginxConfiguredOnStartChecker.ready).To(BeTrue()) }) When("a batch has one event", func() { @@ -429,53 +429,53 @@ var _ = Describe("eventHandler", func() { It("should set the health checker status properly when there are changes", func() { e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} batch := []interface{}{e} - readyChannel := handler.cfg.healthChecker.getReadyCh() + readyChannel := handler.cfg.nginxConfiguredOnStartChecker.getReadyCh() fakeProcessor.ProcessReturns(state.ClusterStateChange, &graph.Graph{}) - Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed()) + Expect(handler.cfg.nginxConfiguredOnStartChecker.readyCheck(nil)).ToNot(Succeed()) handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) Expect(helpers.Diff(handler.GetLatestConfiguration(), &dataplane.Configuration{Version: 1})).To(BeEmpty()) Expect(readyChannel).To(BeClosed()) - Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed()) + Expect(handler.cfg.nginxConfiguredOnStartChecker.readyCheck(nil)).To(Succeed()) }) It("should set the health checker status properly when there are no changes or errors", func() { e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} batch := []interface{}{e} - readyChannel := handler.cfg.healthChecker.getReadyCh() + readyChannel := handler.cfg.nginxConfiguredOnStartChecker.getReadyCh() - Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed()) + Expect(handler.cfg.nginxConfiguredOnStartChecker.readyCheck(nil)).ToNot(Succeed()) handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) Expect(handler.GetLatestConfiguration()).To(BeNil()) Expect(readyChannel).To(BeClosed()) - Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed()) + Expect(handler.cfg.nginxConfiguredOnStartChecker.readyCheck(nil)).To(Succeed()) }) It("should set the health checker status properly when there is an error", func() { e := &events.UpsertEvent{Resource: &gatewayv1.HTTPRoute{}} batch := []interface{}{e} - readyChannel := handler.cfg.healthChecker.getReadyCh() + readyChannel := handler.cfg.nginxConfiguredOnStartChecker.getReadyCh() fakeProcessor.ProcessReturns(state.ClusterStateChange, &graph.Graph{}) fakeNginxRuntimeMgr.ReloadReturns(errors.New("reload error")) handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) - Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed()) + Expect(handler.cfg.nginxConfiguredOnStartChecker.readyCheck(nil)).ToNot(Succeed()) // now send an update with no changes; should still return an error fakeProcessor.ProcessReturns(state.NoChange, &graph.Graph{}) handler.HandleEventBatch(context.Background(), ctlrZap.New(), batch) - Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed()) + Expect(handler.cfg.nginxConfiguredOnStartChecker.readyCheck(nil)).ToNot(Succeed()) // error goes away fakeProcessor.ProcessReturns(state.ClusterStateChange, &graph.Graph{}) @@ -487,7 +487,7 @@ var _ = Describe("eventHandler", func() { Expect(readyChannel).To(BeClosed()) - Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed()) + Expect(handler.cfg.nginxConfiguredOnStartChecker.readyCheck(nil)).To(Succeed()) }) It("should panic for an unknown event type", func() { diff --git a/internal/mode/static/health_test.go b/internal/mode/static/health_test.go index 7f9f63d47d..352cc50b3c 100644 --- a/internal/mode/static/health_test.go +++ b/internal/mode/static/health_test.go @@ -8,9 +8,9 @@ import ( func TestReadyCheck(t *testing.T) { g := NewWithT(t) - hc := newNginxConfiguredOnStartChecker() - g.Expect(hc.readyCheck(nil)).ToNot(Succeed()) + nginxChecker := newNginxConfiguredOnStartChecker() + g.Expect(nginxChecker.readyCheck(nil)).ToNot(Succeed()) - hc.ready = true - g.Expect(hc.readyCheck(nil)).To(Succeed()) + nginxChecker.ready = true + g.Expect(nginxChecker.readyCheck(nil)).To(Succeed()) } diff --git a/internal/mode/static/manager.go b/internal/mode/static/manager.go index 255ecf3a19..910a796bd7 100644 --- a/internal/mode/static/manager.go +++ b/internal/mode/static/manager.go @@ -69,8 +69,8 @@ func init() { // nolint:gocyclo func StartManager(cfg config.Config) error { - hc := newNginxConfiguredOnStartChecker() - mgr, err := createManager(cfg, hc) + nginxChecker := newNginxConfiguredOnStartChecker() + mgr, err := createManager(cfg, nginxChecker) if err != nil { return fmt.Errorf("cannot build runtime manager: %w", err) } @@ -188,12 +188,12 @@ func StartManager(cfg config.Config) error { ngxruntimeCollector, cfg.Logger.WithName("nginxRuntimeManager"), ), - statusUpdater: statusUpdater, - eventRecorder: recorder, - healthChecker: hc, - controlConfigNSName: controlConfigNSName, - gatewayPodConfig: cfg.GatewayPodConfig, - metricsCollector: handlerCollector, + statusUpdater: statusUpdater, + eventRecorder: recorder, + nginxConfiguredOnStartChecker: nginxChecker, + controlConfigNSName: controlConfigNSName, + gatewayPodConfig: cfg.GatewayPodConfig, + metricsCollector: handlerCollector, }) objects, objectLists := prepareFirstEventBatchPreparerArgs(cfg.GatewayClassName, cfg.GatewayNsName) @@ -219,7 +219,7 @@ func StartManager(cfg config.Config) error { ConfigurationGetter: eventHandler, Version: cfg.Version, }) - if err = mgr.Add(createTelemetryJob(cfg, dataCollector, hc)); err != nil { + if err = mgr.Add(createTelemetryJob(cfg, dataCollector, nginxChecker)); err != nil { return fmt.Errorf("cannot register telemetry job: %w", err) } @@ -227,7 +227,7 @@ func StartManager(cfg config.Config) error { return mgr.Start(ctx) } -func createManager(cfg config.Config, hc *nginxConfiguredOnStartChecker) (manager.Manager, error) { +func createManager(cfg config.Config, nginxChecker *nginxConfiguredOnStartChecker) (manager.Manager, error) { options := manager.Options{ Scheme: scheme, Logger: cfg.Logger, @@ -262,7 +262,7 @@ func createManager(cfg config.Config, hc *nginxConfiguredOnStartChecker) (manage } if cfg.HealthConfig.Enabled { - if err := mgr.AddReadyzCheck("readyz", hc.readyCheck); err != nil { + if err := mgr.AddReadyzCheck("readyz", nginxChecker.readyCheck); err != nil { return nil, fmt.Errorf("error adding ready check: %w", err) } } @@ -408,7 +408,7 @@ func registerControllers( func createTelemetryJob( cfg config.Config, dataCollector telemetry.DataCollector, - hc *nginxConfiguredOnStartChecker, + nginxChecker *nginxConfiguredOnStartChecker, ) *runnables.Leader { logger := cfg.Logger.WithName("telemetryJob") exporter := telemetry.NewLoggingExporter(cfg.Logger.WithName("telemetryExporter").V(1 /* debug */)) @@ -426,7 +426,7 @@ func createTelemetryJob( Logger: logger, Period: cfg.TelemetryReportPeriod, JitterFactor: jitterFactor, - ReadyCh: hc.getReadyCh(), + ReadyCh: nginxChecker.getReadyCh(), }, ), } From e3a71e91f44b92f547e4ea687a5e1ca9b777e09e Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 5 Feb 2024 10:23:56 -0800 Subject: [PATCH 40/43] Add check for context canceled --- internal/framework/runnables/cronjob_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/framework/runnables/cronjob_test.go b/internal/framework/runnables/cronjob_test.go index d330652705..ac005d841b 100644 --- a/internal/framework/runnables/cronjob_test.go +++ b/internal/framework/runnables/cronjob_test.go @@ -79,7 +79,6 @@ func TestCronJob_ContextCanceled(t *testing.T) { close(errCh) }() cancel() - - g.Eventually(errCh).Should(Receive()) + g.Eventually(errCh).Should(Receive(MatchError(context.Canceled))) g.Eventually(errCh).Should(BeClosed()) } From 9ca02365062ef4fd6e1911d4911785a8e1119184 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 5 Feb 2024 10:24:51 -0800 Subject: [PATCH 41/43] Add small line fix --- internal/framework/runnables/cronjob_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/framework/runnables/cronjob_test.go b/internal/framework/runnables/cronjob_test.go index ac005d841b..89ddf09d53 100644 --- a/internal/framework/runnables/cronjob_test.go +++ b/internal/framework/runnables/cronjob_test.go @@ -78,6 +78,7 @@ func TestCronJob_ContextCanceled(t *testing.T) { errCh <- job.Start(ctx) close(errCh) }() + cancel() g.Eventually(errCh).Should(Receive(MatchError(context.Canceled))) g.Eventually(errCh).Should(BeClosed()) From de6e3bcf53bdd055e3dd5fc6235545033f3e2c96 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 5 Feb 2024 14:06:50 -0800 Subject: [PATCH 42/43] Add review feedback --- internal/framework/runnables/cronjob_test.go | 13 ++----------- internal/mode/static/manager.go | 6 +++--- internal/mode/static/telemetry/job_worker_test.go | 2 +- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/internal/framework/runnables/cronjob_test.go b/internal/framework/runnables/cronjob_test.go index 89ddf09d53..9a3a6bbf72 100644 --- a/internal/framework/runnables/cronjob_test.go +++ b/internal/framework/runnables/cronjob_test.go @@ -54,24 +54,15 @@ func TestCronJob_ContextCanceled(t *testing.T) { readyChannel := make(chan struct{}) - timeout := 10 * time.Second - var callCount int - - valCh := make(chan int, 128) - worker := func(context.Context) { - callCount++ - valCh <- callCount - } - cfg := CronJobConfig{ - Worker: worker, + Worker: func(ctx context.Context) {}, Logger: zap.New(), Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the CronJob should run a few times ReadyCh: readyChannel, } job := NewCronJob(cfg) - ctx, cancel := context.WithTimeout(context.Background(), timeout) + ctx, cancel := context.WithCancel(context.Background()) errCh := make(chan error) go func() { diff --git a/internal/mode/static/manager.go b/internal/mode/static/manager.go index 910a796bd7..712bb6c46f 100644 --- a/internal/mode/static/manager.go +++ b/internal/mode/static/manager.go @@ -219,7 +219,7 @@ func StartManager(cfg config.Config) error { ConfigurationGetter: eventHandler, Version: cfg.Version, }) - if err = mgr.Add(createTelemetryJob(cfg, dataCollector, nginxChecker)); err != nil { + if err = mgr.Add(createTelemetryJob(cfg, dataCollector, nginxChecker.getReadyCh())); err != nil { return fmt.Errorf("cannot register telemetry job: %w", err) } @@ -408,7 +408,7 @@ func registerControllers( func createTelemetryJob( cfg config.Config, dataCollector telemetry.DataCollector, - nginxChecker *nginxConfiguredOnStartChecker, + readyCh <-chan struct{}, ) *runnables.Leader { logger := cfg.Logger.WithName("telemetryJob") exporter := telemetry.NewLoggingExporter(cfg.Logger.WithName("telemetryExporter").V(1 /* debug */)) @@ -426,7 +426,7 @@ func createTelemetryJob( Logger: logger, Period: cfg.TelemetryReportPeriod, JitterFactor: jitterFactor, - ReadyCh: nginxChecker.getReadyCh(), + ReadyCh: readyCh, }, ), } diff --git a/internal/mode/static/telemetry/job_worker_test.go b/internal/mode/static/telemetry/job_worker_test.go index b1c119e13c..4e804ae26d 100644 --- a/internal/mode/static/telemetry/job_worker_test.go +++ b/internal/mode/static/telemetry/job_worker_test.go @@ -36,9 +36,9 @@ func TestCreateTelemetryJobWorker(t *testing.T) { timeout := 10 * time.Second ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() worker(ctx) _, data := exporter.ExportArgsForCall(0) g.Expect(data).To(Equal(expData)) - cancel() } From 89c41d5429ffd7486edb731b85b961e8d2dbea96 Mon Sep 17 00:00:00 2001 From: Benjamin Jee Date: Mon, 5 Feb 2024 14:38:25 -0800 Subject: [PATCH 43/43] Move test case around in collector tests --- internal/framework/runnables/cronjob.go | 2 +- .../mode/static/telemetry/collector_test.go | 64 +++++++++++++------ 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/internal/framework/runnables/cronjob.go b/internal/framework/runnables/cronjob.go index eed2ad4d07..3bd4f32eaf 100644 --- a/internal/framework/runnables/cronjob.go +++ b/internal/framework/runnables/cronjob.go @@ -13,7 +13,7 @@ import ( type CronJobConfig struct { // Worker is the function that will be run for every cronjob iteration. Worker func(context.Context) - // ReadyCh represents if the cronjob is ready to start. + // ReadyCh delays the start of the job until the channel is closed. ReadyCh <-chan struct{} // Logger is the logger. Logger logr.Logger diff --git a/internal/mode/static/telemetry/collector_test.go b/internal/mode/static/telemetry/collector_test.go index 26e5f069e8..54a7cf8539 100644 --- a/internal/mode/static/telemetry/collector_test.go +++ b/internal/mode/static/telemetry/collector_test.go @@ -162,22 +162,6 @@ var _ = Describe("Collector", Ordered, func() { }, }, }, - { - Name: "upstream3", - ErrorMsg: "there is an error here", - Endpoints: []resolver.Endpoint{ - { - Address: "endpoint1", - Port: 80, - }, { - Address: "endpoint2", - Port: 80, - }, { - Address: "endpoint3", - Port: 80, - }, - }, - }, }, } @@ -242,8 +226,8 @@ var _ = Describe("Collector", Ordered, func() { Describe("NGF resource count collector", func() { var ( - graph1 *graph.Graph - config1 *dataplane.Configuration + graph1 *graph.Graph + config1, invalidUpstreamsConfig *dataplane.Configuration ) BeforeAll(func() { @@ -280,6 +264,32 @@ var _ = Describe("Collector", Ordered, func() { }, }, } + + invalidUpstreamsConfig = &dataplane.Configuration{ + Upstreams: []dataplane.Upstream{ + { + Name: "invalidUpstream", + ErrorMsg: "there is an error here", + Endpoints: []resolver.Endpoint{ + { + Address: "endpoint1", + Port: 80, + }, { + Address: "endpoint2", + Port: 80, + }, { + Address: "endpoint3", + Port: 80, + }, + }, + }, + { + Name: "emptyUpstream", + ErrorMsg: "", + Endpoints: []resolver.Endpoint{}, + }, + }, + } }) When("collecting NGF resource counts", func() { @@ -314,6 +324,24 @@ var _ = Describe("Collector", Ordered, func() { Expect(expData).To(Equal(data)) }) + It("ignores invalid and empty upstreams", func() { + fakeGraphGetter.GetLatestGraphReturns(&graph.Graph{}) + fakeConfigurationGetter.GetLatestConfigurationReturns(invalidUpstreamsConfig) + expData.NGFResourceCounts = telemetry.NGFResourceCounts{ + Gateways: 0, + GatewayClasses: 0, + HTTPRoutes: 0, + Secrets: 0, + Services: 0, + Endpoints: 0, + } + + data, err := dataCollector.Collect(ctx) + + Expect(err).To(BeNil()) + Expect(expData).To(Equal(data)) + }) + When("it encounters an error while collecting data", func() { BeforeEach(func() { fakeGraphGetter.GetLatestGraphReturns(&graph.Graph{})