Skip to content

Commit c1d858c

Browse files
committed
rfac: added connMetric in TestMetricController_updatePrometheusMetric ut
Signed-off-by: Yash Patel <[email protected]>
1 parent 584a664 commit c1d858c

File tree

4 files changed

+123
-22
lines changed

4 files changed

+123
-22
lines changed

ctl/monitoring/monitoring.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ func SetWorkloadMetricsPerKmeshDaemon(cli kube.CLIClient, podName, workloadMetri
291291
return
292292
}
293293
bodyString := string(bodyBytes)
294-
if resp.StatusCode == http.StatusBadRequest && bytes.Contains(bodyBytes, []byte("Kmesh monitoring is disable, cannot enable accesslog")) {
294+
if resp.StatusCode == http.StatusBadRequest && bytes.Contains(bodyBytes, []byte("Kmesh monitoring is disable, cannot enable workloadMetrics")) {
295295
log.Errorf("failed to enable workload metrics: %v. Need to start Kmesh's Monitoring. Please run `kmeshctl monitoring -h` for more help.", bodyString)
296296
return
297297
}
@@ -346,7 +346,7 @@ func SetConnectionMetricsPerKmeshDaemon(cli kube.CLIClient, podName, connectionM
346346
return
347347
}
348348
bodyString := string(bodyBytes)
349-
if resp.StatusCode == http.StatusBadRequest && bytes.Contains(bodyBytes, []byte("Kmesh monitoring is disable, cannot enable accesslog")) {
349+
if resp.StatusCode == http.StatusBadRequest && bytes.Contains(bodyBytes, []byte("Kmesh monitoring is disable, cannot enable connectionMetrics")) {
350350
log.Errorf("failed to enable connection metrics: %v. Need to start Kmesh's Monitoring. Please run `kmeshctl monitoring -h` for more help.", bodyString)
351351
return
352352
}

docs/proposal/tcp_long_connection_metrics.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,19 +171,19 @@ We will update the functions of metric.go for periodic updating the workload and
171171

172172
#### Exposing long connection prometheus metrics
173173

174-
We will expose metrics for the connections whose duration exceesds 30 seconds. Not exposing metrics for short connection as it can lead to lot of metrics and they are also not suitable for prometheus metrics because prometheus itself has a scrape interval of maximum 15s, and short-lived connections may start and end between scrapes, resulting in incomplete or misleading data. By focusing only on longer-lived connections, we ensure the metrics are stable, meaningful, and better aligned with Prometheus’s time-series data model.
174+
We will expose metrics for the connections whose duration exceeds 30 seconds. Not exposing metrics for short connection as it can lead to lot of metrics and they are also not suitable for prometheus metrics because prometheus itself has a scrape interval of maximum 15s, and short-lived connections may start and end between scrapes, resulting in incomplete or misleading data. By focusing only on longer-lived connections, we ensure the metrics are stable, meaningful, and better aligned with Prometheus’s time-series data model.
175175

176176
We can have a another component in future which reports realtime information about connections like cilium hubble.
177177

178178
Prometheus metrics exposed
179179

180-
- kmesh_tcp_connection_sent_bytes_total : The total number of bytes sent over established TCP connection
180+
- `kmesh_tcp_connection_sent_bytes_total` : The total number of bytes sent over established TCP connection
181181

182-
- kmesh_tcp_connection_received_bytes_total : The total number of bytes received over established TCP connection
182+
- `kmesh_tcp_connection_received_bytes_total` : The total number of bytes received over established TCP connection
183183

184-
- kmesh_tcp_connection_packet_lost_total : Total number of packets lost during transmission in a TCP connection
184+
- `kmesh_tcp_connection_packet_lost_total` : Total number of packets lost during transmission in a TCP connection
185185

186-
- kmesh_tcp_connection_retrans_total : The total number of retransmits over established TCP connection
186+
- `kmesh_tcp_connection_retrans_total` : The total number of retransmits over established TCP connection
187187

188188
The above metrics has following labels
189189

pkg/controller/telemetry/metric.go

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,7 @@ func (m *MetricController) Run(ctx context.Context, mapOfTcpInfo *ebpf.Map) {
526526
}
527527
m.updateServiceMetricCache(data, serviceLabels, tcpConns)
528528
if m.EnableConnectionMetric.Load() && data.duration > LONG_CONN_METRIC_THRESHOLD {
529-
deleteConnection = m.updateConnectionMetricCache(data, tcpConns[data.conSrcDstInfo], connectionLabels, deleteConnection)
529+
m.updateConnectionMetricCache(data, tcpConns[data.conSrcDstInfo], connectionLabels)
530530
}
531531
m.mutex.Unlock()
532532
}
@@ -906,7 +906,7 @@ func (m *MetricController) updateServiceMetricCache(data requestMetric, labels s
906906
}
907907
}
908908

909-
func (m *MetricController) updateConnectionMetricCache(data requestMetric, connData connMetric, labels connectionMetricLabels, delConn []*connectionMetricLabels) []*connectionMetricLabels {
909+
func (m *MetricController) updateConnectionMetricCache(data requestMetric, connData connMetric, labels connectionMetricLabels) {
910910
v, ok := m.connectionMetricCache[labels]
911911
if ok {
912912
v.ConnSentBytes = v.ConnSentBytes + float64(data.sentBytes)
@@ -923,10 +923,9 @@ func (m *MetricController) updateConnectionMetricCache(data requestMetric, connD
923923
}
924924
if data.state == TCP_CLOSTED {
925925
deleteLock.Lock()
926-
delConn = append(delConn, &labels)
926+
deleteConnection = append(deleteConnection, &labels)
927927
deleteLock.Unlock()
928928
}
929-
return delConn
930929
}
931930

932931
func (m *MetricController) updatePrometheusMetric() {

pkg/controller/telemetry/metric_test.go

Lines changed: 113 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -618,15 +618,16 @@ func TestBuildConnectionMetricsToPrometheus(t *testing.T) {
618618
},
619619
}
620620

621-
delConn := m.updateConnectionMetricCache(tt.args.data, tt.args.tcpConns[tt.args.data.conSrcDstInfo], tt.args.labels, []*connectionMetricLabels{})
621+
deleteConnection = []*connectionMetricLabels{}
622+
m.updateConnectionMetricCache(tt.args.data, tt.args.tcpConns[tt.args.data.conSrcDstInfo], tt.args.labels)
622623
assert.Equal(t, m.connectionMetricCache[tt.args.labels].ConnSentBytes, tt.want[0])
623624
assert.Equal(t, m.connectionMetricCache[tt.args.labels].ConnReceivedBytes, tt.want[1])
624625
assert.Equal(t, m.connectionMetricCache[tt.args.labels].ConnPacketLost, tt.want[2])
625626
assert.Equal(t, m.connectionMetricCache[tt.args.labels].ConnTotalRetrans, tt.want[3])
626627
if tt.id == 1 {
627-
assert.Equal(t, len(delConn), 0)
628+
assert.Equal(t, len(deleteConnection), 0)
628629
} else {
629-
assert.Equal(t, len(delConn), 1)
630+
assert.Equal(t, len(deleteConnection), 1)
630631
}
631632
})
632633
}
@@ -1344,7 +1345,7 @@ func TestBuildConnectionMetric(t *testing.T) {
13441345
},
13451346
want: connectionMetricLabels{
13461347
reporter: "source",
1347-
startTime: "0001-01-01 00:00:00 +0000 UTC",
1348+
startTime: "",
13481349
sourceWorkload: "sleep",
13491350
sourceCanonicalService: "sleepCanonical",
13501351
sourceCanonicalRevision: "sleepVersion",
@@ -1386,6 +1387,7 @@ func TestBuildConnectionMetric(t *testing.T) {
13861387
serviceCache: serviceCache,
13871388
}
13881389
got := m.buildConnectionMetric(tt.args.data)
1390+
got.startTime = ""
13891391
if !reflect.DeepEqual(got, tt.want) {
13901392
t.Errorf("Metric.buildMetric() = %v, want %v", got, tt.want)
13911393
}
@@ -1491,20 +1493,96 @@ func TestMetricController_updatePrometheusMetric(t *testing.T) {
14911493
responseFlags: "-",
14921494
connectionSecurityPolicy: "mutual_tls",
14931495
}
1496+
1497+
testConnLabel1 := connectionMetricLabels{
1498+
reporter: "source",
1499+
startTime: "0001-01-01 00:00:00 +0000 UTC",
1500+
sourceWorkload: "sleep",
1501+
sourceCanonicalService: "sleepCanonical",
1502+
sourceCanonicalRevision: "sleepVersion",
1503+
sourceWorkloadNamespace: "default",
1504+
sourcePrincipal: "spiffe://cluster.local/ns/default/sa/default",
1505+
sourceApp: "sleepCanonical",
1506+
sourceVersion: "sleepVersion",
1507+
sourceCluster: "Kubernetes",
1508+
sourceAddress: "10.19.25.33:8000",
1509+
1510+
destinationAddress: "10.19.25.31:8000",
1511+
destinationPodAddress: "10.19.25.31",
1512+
destinationPodNamespace: "kmesh-system",
1513+
destinationPodName: "kmesh",
1514+
destinationService: "kmesh.kmesh-system.svc.cluster.local",
1515+
destinationServiceNamespace: "kmesh-system",
1516+
destinationServiceName: "kmesh",
1517+
destinationWorkload: "kmesh-daemon",
1518+
destinationCanonicalService: "dstCanonical",
1519+
destinationCanonicalRevision: "dstVersion",
1520+
destinationWorkloadNamespace: "kmesh-system",
1521+
destinationPrincipal: "spiffe://cluster.local/ns/kmesh-system/sa/default",
1522+
destinationApp: "dstCanonical",
1523+
destinationVersion: "dstVersion",
1524+
destinationCluster: "Kubernetes",
1525+
1526+
requestProtocol: "tcp",
1527+
responseFlags: "",
1528+
1529+
connectionSecurityPolicy: "mutual_tls",
1530+
}
1531+
1532+
testConnLabel2 := connectionMetricLabels{
1533+
reporter: "destination",
1534+
startTime: "0001-01-01 00:00:00 +0000 UTC",
1535+
sourceWorkload: "sleep",
1536+
sourceCanonicalService: "sleepCanonical",
1537+
sourceCanonicalRevision: "sleepVersion",
1538+
sourceWorkloadNamespace: "default",
1539+
sourcePrincipal: "spiffe://cluster.local/ns/default/sa/default",
1540+
sourceApp: "sleepCanonical",
1541+
sourceVersion: "sleepVersion",
1542+
sourceCluster: "Kubernetes",
1543+
sourceAddress: "10.19.25.33:8000",
1544+
1545+
destinationAddress: "10.19.25.31:8000",
1546+
destinationPodAddress: "10.19.25.31",
1547+
destinationPodNamespace: "kmesh-system",
1548+
destinationPodName: "kmesh",
1549+
destinationService: "kmesh.kmesh-system.svc.cluster.local",
1550+
destinationServiceNamespace: "kmesh-system",
1551+
destinationServiceName: "kmesh",
1552+
destinationWorkload: "kmesh-daemon",
1553+
destinationCanonicalService: "dstCanonical",
1554+
destinationCanonicalRevision: "dstVersion",
1555+
destinationWorkloadNamespace: "kmesh-system",
1556+
destinationPrincipal: "spiffe://cluster.local/ns/kmesh-system/sa/default",
1557+
destinationApp: "dstCanonical",
1558+
destinationVersion: "dstVersion",
1559+
destinationCluster: "Kubernetes",
1560+
1561+
requestProtocol: "tcp",
1562+
responseFlags: "",
1563+
1564+
connectionSecurityPolicy: "mutual_tls",
1565+
}
1566+
14941567
workloadPrometheusLabel1 := struct2map(testworkloadLabel1)
14951568
workloadPrometheusLabel2 := struct2map(testworkloadLabel2)
14961569
servicePrometheusLabel1 := struct2map(testServiceLabel1)
14971570
servicePrometheusLabel2 := struct2map(testServiceLabel2)
1571+
connectionPrometheusLabel1 := struct2map(testConnLabel1)
1572+
connectionPrometheusLabel2 := struct2map(testConnLabel2)
1573+
14981574
tests := []struct {
1499-
name string
1500-
workloadMetricCache workloadMetricInfo
1501-
serviceMetricCache serviceMetricInfo
1502-
exportWorkloadMetrics []*prometheus.GaugeVec
1503-
exportServiceMetrics []*prometheus.GaugeVec
1504-
want []float64
1575+
name string
1576+
workloadMetricCache workloadMetricInfo
1577+
serviceMetricCache serviceMetricInfo
1578+
connectionMetricCache connectionMetricInfo
1579+
exportWorkloadMetrics []*prometheus.GaugeVec
1580+
exportServiceMetrics []*prometheus.GaugeVec
1581+
exportConnectionMetrics []*prometheus.GaugeVec
1582+
want []float64
15051583
}{
15061584
{
1507-
name: "update workload metric in Prometheus",
1585+
name: "update metric in Prometheus",
15081586
workloadMetricCache: workloadMetricInfo{
15091587
WorkloadConnOpened: 1,
15101588
WorkloadConnClosed: 2,
@@ -1521,6 +1599,12 @@ func TestMetricController_updatePrometheusMetric(t *testing.T) {
15211599
ServiceConnSentBytes: 9,
15221600
ServiceConnReceivedBytes: 10,
15231601
},
1602+
connectionMetricCache: connectionMetricInfo{
1603+
ConnSentBytes: 1,
1604+
ConnReceivedBytes: 2,
1605+
ConnPacketLost: 3,
1606+
ConnTotalRetrans: 4,
1607+
},
15241608
exportWorkloadMetrics: []*prometheus.GaugeVec{
15251609
tcpConnectionOpenedInWorkload,
15261610
tcpConnectionClosedInWorkload,
@@ -1535,6 +1619,12 @@ func TestMetricController_updatePrometheusMetric(t *testing.T) {
15351619
tcpSentBytesInService,
15361620
tcpReceivedBytesInService,
15371621
},
1622+
exportConnectionMetrics: []*prometheus.GaugeVec{
1623+
tcpConnectionTotalSendBytes,
1624+
tcpConnectionTotalReceivedBytes,
1625+
tcpConnectionTotalPacketLost,
1626+
tcpConnectionTotalRetrans,
1627+
},
15381628
want: []float64{
15391629
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
15401630
},
@@ -1554,6 +1644,10 @@ func TestMetricController_updatePrometheusMetric(t *testing.T) {
15541644
testServiceLabel1: &tt.serviceMetricCache,
15551645
testServiceLabel2: &tt.serviceMetricCache,
15561646
},
1647+
connectionMetricCache: map[connectionMetricLabels]*connectionMetricInfo{
1648+
testConnLabel1: &tt.connectionMetricCache,
1649+
testConnLabel2: &tt.connectionMetricCache,
1650+
},
15571651
}
15581652
m.updatePrometheusMetric()
15591653
index := 0
@@ -1571,6 +1665,14 @@ func TestMetricController_updatePrometheusMetric(t *testing.T) {
15711665
assert.Equal(t, tt.want[index], v2)
15721666
index = index + 1
15731667
}
1668+
index = 0
1669+
for _, metric := range tt.exportConnectionMetrics {
1670+
v1 := testutil.ToFloat64(metric.With(connectionPrometheusLabel1))
1671+
assert.Equal(t, tt.want[index], v1)
1672+
v2 := testutil.ToFloat64(metric.With(connectionPrometheusLabel2))
1673+
assert.Equal(t, tt.want[index], v2)
1674+
index = index + 1
1675+
}
15741676
cancel()
15751677
})
15761678
}

0 commit comments

Comments
 (0)