Skip to content

Commit 95c8613

Browse files
authored
Add Telemetry collector structure with some collected resources (#1497)
Problem: We want to create a starting design to collect telemetry data. Solution: Implemented design to collect multiple types of telemetry data: Cluster Node Count, Count of NGF Resources (Graph), Project and Version.
1 parent 8c4fb19 commit 95c8613

23 files changed

+1258
-96
lines changed

cmd/gateway/commands.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ func createStaticModeCommand() *cobra.Command {
170170
},
171171
Plus: plus,
172172
TelemetryReportPeriod: period,
173+
Version: version,
173174
}
174175

175176
if err := static.StartManager(conf); err != nil {

deploy/helm-chart/templates/rbac.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ rules:
2121
- namespaces
2222
- services
2323
- secrets
24+
# FIXME(bjee19): make nodes permission dependent on telemetry being enabled.
25+
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
26+
- nodes
2427
verbs:
2528
- list
2629
- watch

deploy/manifests/nginx-gateway.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ rules:
3232
- namespaces
3333
- services
3434
- secrets
35+
# FIXME(bjee19): make nodes permission dependent on telemetry being enabled.
36+
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
37+
- nodes
3538
verbs:
3639
- list
3740
- watch

internal/framework/runnables/cronjob.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import (
1313
type CronJobConfig struct {
1414
// Worker is the function that will be run for every cronjob iteration.
1515
Worker func(context.Context)
16+
// ReadyCh delays the start of the job until the channel is closed.
17+
ReadyCh <-chan struct{}
1618
// Logger is the logger.
1719
Logger logr.Logger
1820
// Period defines the period of the cronjob. The cronjob will run every Period.
@@ -37,6 +39,13 @@ func NewCronJob(cfg CronJobConfig) *CronJob {
3739
// Start starts the cronjob.
3840
// Implements controller-runtime manager.Runnable
3941
func (j *CronJob) Start(ctx context.Context) error {
42+
select {
43+
case <-j.cfg.ReadyCh:
44+
case <-ctx.Done():
45+
j.cfg.Logger.Info("Context canceled, failed to start cronjob")
46+
return ctx.Err()
47+
}
48+
4049
j.cfg.Logger.Info("Starting cronjob")
4150

4251
sliding := true // This means the period with jitter will be calculated after each worker call.

internal/framework/runnables/cronjob_test.go

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212
func TestCronJob(t *testing.T) {
1313
g := NewWithT(t)
1414

15+
readyChannel := make(chan struct{})
16+
1517
timeout := 10 * time.Second
1618
var callCount int
1719

@@ -22,9 +24,10 @@ func TestCronJob(t *testing.T) {
2224
}
2325

2426
cfg := CronJobConfig{
25-
Worker: worker,
26-
Logger: zap.New(),
27-
Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the CronJob should run a few times
27+
Worker: worker,
28+
Logger: zap.New(),
29+
Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the CronJob should run a few times
30+
ReadyCh: readyChannel,
2831
}
2932
job := NewCronJob(cfg)
3033

@@ -35,6 +38,7 @@ func TestCronJob(t *testing.T) {
3538
errCh <- job.Start(ctx)
3639
close(errCh)
3740
}()
41+
close(readyChannel)
3842

3943
minReports := 2 // ensure that the CronJob reports more than once: it doesn't exit after the first run
4044

@@ -44,3 +48,29 @@ func TestCronJob(t *testing.T) {
4448
g.Eventually(errCh).Should(Receive(BeNil()))
4549
g.Eventually(errCh).Should(BeClosed())
4650
}
51+
52+
func TestCronJob_ContextCanceled(t *testing.T) {
53+
g := NewWithT(t)
54+
55+
readyChannel := make(chan struct{})
56+
57+
cfg := CronJobConfig{
58+
Worker: func(ctx context.Context) {},
59+
Logger: zap.New(),
60+
Period: 1 * time.Millisecond, // 1ms is much smaller than timeout so the CronJob should run a few times
61+
ReadyCh: readyChannel,
62+
}
63+
job := NewCronJob(cfg)
64+
65+
ctx, cancel := context.WithCancel(context.Background())
66+
67+
errCh := make(chan error)
68+
go func() {
69+
errCh <- job.Start(ctx)
70+
close(errCh)
71+
}()
72+
73+
cancel()
74+
g.Eventually(errCh).Should(Receive(MatchError(context.Canceled)))
75+
g.Eventually(errCh).Should(BeClosed())
76+
}

internal/mode/static/config/config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99
)
1010

1111
type Config struct {
12+
// Version is the running NGF version.
13+
Version string
1214
// AtomicLevel is an atomically changeable, dynamic logging level.
1315
AtomicLevel zap.AtomicLevel
1416
// GatewayNsName is the namespaced name of a Gateway resource that the Gateway will use.

internal/mode/static/handler.go

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package static
33
import (
44
"context"
55
"fmt"
6+
"sync"
67
"time"
78

89
"github.com/go-logr/logr"
@@ -58,8 +59,8 @@ type eventHandlerConfig struct {
5859
logLevelSetter logLevelSetter
5960
// metricsCollector collects metrics for this controller.
6061
metricsCollector handlerMetricsCollector
61-
// healthChecker sets the health of the Pod to Ready once we've written out our initial config
62-
healthChecker *healthChecker
62+
// nginxConfiguredOnStartChecker sets the health of the Pod to Ready once we've written out our initial config.
63+
nginxConfiguredOnStartChecker *nginxConfiguredOnStartChecker
6364
// controlConfigNSName is the NamespacedName of the NginxGateway config for this controller.
6465
controlConfigNSName types.NamespacedName
6566
// version is the current version number of the nginx config.
@@ -72,7 +73,10 @@ type eventHandlerConfig struct {
7273
// (2) Keeping the statuses of the Gateway API resources updated.
7374
// (3) Updating control plane configuration.
7475
type eventHandlerImpl struct {
75-
cfg eventHandlerConfig
76+
// latestConfiguration is the latest Configuration generation.
77+
latestConfiguration *dataplane.Configuration
78+
cfg eventHandlerConfig
79+
lock sync.Mutex
7680
}
7781

7882
// newEventHandlerImpl creates a new eventHandlerImpl.
@@ -105,36 +109,44 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, logger logr.Log
105109
switch changeType {
106110
case state.NoChange:
107111
logger.Info("Handling events didn't result into NGINX configuration changes")
108-
if !h.cfg.healthChecker.ready && h.cfg.healthChecker.firstBatchError == nil {
109-
h.cfg.healthChecker.setAsReady()
112+
if !h.cfg.nginxConfiguredOnStartChecker.ready && h.cfg.nginxConfiguredOnStartChecker.firstBatchError == nil {
113+
h.cfg.nginxConfiguredOnStartChecker.setAsReady()
110114
}
111115
return
112116
case state.EndpointsOnlyChange:
113117
h.cfg.version++
118+
cfg := dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version)
119+
120+
h.setLatestConfiguration(&cfg)
121+
114122
err = h.updateUpstreamServers(
115123
ctx,
116124
logger,
117-
dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version),
125+
cfg,
118126
)
119127
case state.ClusterStateChange:
120128
h.cfg.version++
129+
cfg := dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version)
130+
131+
h.setLatestConfiguration(&cfg)
132+
121133
err = h.updateNginxConf(
122134
ctx,
123-
dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version),
135+
cfg,
124136
)
125137
}
126138

127139
var nginxReloadRes nginxReloadResult
128140
if err != nil {
129141
logger.Error(err, "Failed to update NGINX configuration")
130142
nginxReloadRes.error = err
131-
if !h.cfg.healthChecker.ready {
132-
h.cfg.healthChecker.firstBatchError = err
143+
if !h.cfg.nginxConfiguredOnStartChecker.ready {
144+
h.cfg.nginxConfiguredOnStartChecker.firstBatchError = err
133145
}
134146
} else {
135147
logger.Info("NGINX configuration was successfully updated")
136-
if !h.cfg.healthChecker.ready {
137-
h.cfg.healthChecker.setAsReady()
148+
if !h.cfg.nginxConfiguredOnStartChecker.ready {
149+
h.cfg.nginxConfiguredOnStartChecker.setAsReady()
138150
}
139151
}
140152

@@ -384,3 +396,19 @@ func getGatewayAddresses(
384396

385397
return gwAddresses, nil
386398
}
399+
400+
// GetLatestConfiguration gets the latest configuration.
401+
func (h *eventHandlerImpl) GetLatestConfiguration() *dataplane.Configuration {
402+
h.lock.Lock()
403+
defer h.lock.Unlock()
404+
405+
return h.latestConfiguration
406+
}
407+
408+
// setLatestConfiguration sets the latest configuration.
409+
func (h *eventHandlerImpl) setLatestConfiguration(cfg *dataplane.Configuration) {
410+
h.lock.Lock()
411+
defer h.lock.Unlock()
412+
413+
h.latestConfiguration = cfg
414+
}

0 commit comments

Comments
 (0)