Skip to content

Commit c6c7086

Browse files
committed
rfac: outputAccesslog func to skip accesslog at conn establishment
Signed-off-by: Yash Patel <[email protected]>
1 parent 9ef41e7 commit c6c7086

File tree

5 files changed

+12
-12
lines changed

5 files changed

+12
-12
lines changed

docs/proposal/tcp_long_connection_metrics.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ We will update the functions of metric.go for periodic updating the workload and
171171

172172
#### Exposing long connection prometheus metrics
173173

174-
We will expose metrics for the connections whose duration exceeds 30 seconds. Not exposing metrics for short connection as it can lead to lot of metrics and they are also not suitable for prometheus metrics because prometheus itself has a scrape interval of maximum 15s, and short-lived connections may start and end between scrapes, resulting in incomplete or misleading data. By focusing only on longer-lived connections, we ensure the metrics are stable, meaningful, and better aligned with Prometheus’s time-series data model.
174+
We will expose metrics for the connections whose duration exceeds 5 seconds. Not exposing metrics for short connection as it can lead to lot of metrics and they are also not suitable for prometheus metrics because prometheus itself has a scrape interval of 5s, and short-lived connections may start and end between scrapes, resulting in incomplete or misleading data. By focusing only on longer-lived connections, we ensure the metrics are stable, meaningful, and better aligned with Prometheus’s time-series data model.
175175

176176
We can have a another component in future which reports realtime information about connections like cilium hubble.
177177

pkg/controller/telemetry/accesslog.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func (l *logInfo) withDestinationService(service *workloadapi.Service) *logInfo
7979

8080
func OutputAccesslog(data requestMetric, conn_metrics connMetric, accesslog logInfo) {
8181
// Skip output access log on connection establishment
82-
if data.state == TCP_ESTABLISHED && data.duration < LONG_CONN_METRIC_THRESHOLD {
82+
if data.state == TCP_ESTABLISHED && conn_metrics.totalReports == 1 {
8383
return
8484
}
8585
logStr := buildAccesslog(data, conn_metrics, accesslog)

pkg/controller/telemetry/metric.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ import (
3838

3939
const (
4040
TCP_ESTABLISHED = uint32(1)
41-
TCP_CLOSTED = uint32(7)
41+
TCP_CLOSED = uint32(7)
4242

4343
connection_success = uint32(1)
4444

@@ -512,7 +512,7 @@ func (m *MetricController) Run(ctx context.Context, mapOfTcpInfo *ebpf.Map) {
512512
connectionLabels = m.buildConnectionMetric(&data)
513513
}
514514
if m.EnableAccesslog.Load() {
515-
// accesslogs at interval of 5 sec during connection lifecycle and at close of connection
515+
// accesslogs at interval of 5 sec during connection lifecycle if connectionMetrics is enabled and at close of connection
516516
OutputAccesslog(data, tcpConns[data.conSrcDstInfo], accesslog)
517517
}
518518

@@ -526,7 +526,7 @@ func (m *MetricController) Run(ctx context.Context, mapOfTcpInfo *ebpf.Map) {
526526
}
527527
m.mutex.Unlock()
528528

529-
if data.state == TCP_CLOSTED {
529+
if data.state == TCP_CLOSED {
530530
delete(tcpConns, data.conSrcDstInfo)
531531
}
532532
}
@@ -848,7 +848,7 @@ func (m *MetricController) updateWorkloadMetricCache(data requestMetric, labels
848848
if data.state == TCP_ESTABLISHED && metric.totalReports == 1 {
849849
v.WorkloadConnOpened = v.WorkloadConnOpened + 1
850850
}
851-
if data.state == TCP_CLOSTED {
851+
if data.state == TCP_CLOSED {
852852
v.WorkloadConnClosed = v.WorkloadConnClosed + 1
853853
}
854854
if data.success != connection_success {
@@ -863,7 +863,7 @@ func (m *MetricController) updateWorkloadMetricCache(data requestMetric, labels
863863
if data.state == TCP_ESTABLISHED && metric.totalReports == 1 {
864864
newWorkloadMetricInfo.WorkloadConnOpened = 1
865865
}
866-
if data.state == TCP_CLOSTED {
866+
if data.state == TCP_CLOSED {
867867
newWorkloadMetricInfo.WorkloadConnClosed = 1
868868
}
869869
if data.success != connection_success {
@@ -883,7 +883,7 @@ func (m *MetricController) updateServiceMetricCache(data requestMetric, labels s
883883
if data.state == TCP_ESTABLISHED && metric.totalReports == 1 {
884884
v.ServiceConnOpened = v.ServiceConnOpened + 1
885885
}
886-
if data.state == TCP_CLOSTED {
886+
if data.state == TCP_CLOSED {
887887
v.ServiceConnClosed = v.ServiceConnClosed + 1
888888
}
889889
if data.success != connection_success {
@@ -896,7 +896,7 @@ func (m *MetricController) updateServiceMetricCache(data requestMetric, labels s
896896
if data.state == TCP_ESTABLISHED && metric.totalReports == 1 {
897897
newServiceMetricInfo.ServiceConnOpened = 1
898898
}
899-
if data.state == TCP_CLOSTED {
899+
if data.state == TCP_CLOSED {
900900
newServiceMetricInfo.ServiceConnClosed = 1
901901
}
902902
if data.success != connection_success {
@@ -923,7 +923,7 @@ func (m *MetricController) updateConnectionMetricCache(data requestMetric, connD
923923
newConnectionMetricInfo.ConnTotalRetrans = float64(connData.totalRetrans)
924924
m.connectionMetricCache[labels] = &newConnectionMetricInfo
925925
}
926-
if data.state == TCP_CLOSTED {
926+
if data.state == TCP_CLOSED {
927927
deleteLock.Lock()
928928
deleteConnection = append(deleteConnection, &labels)
929929
deleteLock.Unlock()

pkg/controller/telemetry/metric_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ func TestBuildConnectionMetricsToPrometheus(t *testing.T) {
535535
receivedBytes: 0x0000004,
536536
packetLost: 0x0000001,
537537
totalRetrans: 0x0000002,
538-
state: TCP_CLOSTED,
538+
state: TCP_CLOSED,
539539
},
540540
labels: connectionMetricLabels{
541541
reporter: "source",

pkg/controller/telemetry/utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ var (
245245
Help: "The total number of TCP connections failed to a service.",
246246
}, serviceLabels)
247247

248-
// Metrics to track the status of long lived TCP connections (duration > 30s)
248+
// Metrics to track the status of long lived TCP connections
249249
tcpConnectionTotalSendBytes = prometheus.NewGaugeVec(
250250
prometheus.GaugeOpts{
251251
Name: "kmesh_tcp_connection_sent_bytes_total",

0 commit comments

Comments
 (0)