rfac: added connMetric in TestMetricController_updatePrometheusMetric ut

yp969803 · yp969803 · commit c1d858c5fa85 · 2025-04-22T10:11:58.000+05:30
Signed-off-by: Yash Patel &lt;yp969803@gmail.com&gt;
diff --git a/ctl/monitoring/monitoring.go b/ctl/monitoring/monitoring.go
@@ -291,7 +291,7 @@ func SetWorkloadMetricsPerKmeshDaemon(cli kube.CLIClient, podName, workloadMetri
 			return
 		}
 		bodyString := string(bodyBytes)
-		if resp.StatusCode == http.StatusBadRequest && bytes.Contains(bodyBytes, []byte("Kmesh monitoring is disable, cannot enable accesslog")) {
+		if resp.StatusCode == http.StatusBadRequest && bytes.Contains(bodyBytes, []byte("Kmesh monitoring is disable, cannot enable workloadMetrics")) {
 			log.Errorf("failed to enable workload metrics: %v. Need to start Kmesh's Monitoring. Please run `kmeshctl monitoring -h` for more help.", bodyString)
 			return
 		}
@@ -346,7 +346,7 @@ func SetConnectionMetricsPerKmeshDaemon(cli kube.CLIClient, podName, connectionM
 			return
 		}
 		bodyString := string(bodyBytes)
-		if resp.StatusCode == http.StatusBadRequest && bytes.Contains(bodyBytes, []byte("Kmesh monitoring is disable, cannot enable accesslog")) {
+		if resp.StatusCode == http.StatusBadRequest && bytes.Contains(bodyBytes, []byte("Kmesh monitoring is disable, cannot enable connectionMetrics")) {
 			log.Errorf("failed to enable connection metrics: %v. Need to start Kmesh's Monitoring. Please run `kmeshctl monitoring -h` for more help.", bodyString)
 			return
 		}
diff --git a/docs/proposal/tcp_long_connection_metrics.md b/docs/proposal/tcp_long_connection_metrics.md
@@ -171,19 +171,19 @@ We will update the functions of metric.go for periodic updating the workload and
 
 #### Exposing long connection prometheus metrics 
 
-We will expose metrics for the connections whose duration exceesds 30 seconds. Not exposing metrics for short connection as it can lead to lot of metrics and they are also not suitable for prometheus metrics because prometheus itself has a scrape interval of maximum 15s, and short-lived connections may start and end between scrapes, resulting in incomplete or misleading data. By focusing only on longer-lived connections, we ensure the metrics are stable, meaningful, and better aligned with Prometheus’s time-series data model.
+We will expose metrics for the connections whose duration exceeds 30 seconds. Not exposing metrics for short connection as it can lead to lot of metrics and they are also not suitable for prometheus metrics because prometheus itself has a scrape interval of maximum 15s, and short-lived connections may start and end between scrapes, resulting in incomplete or misleading data. By focusing only on longer-lived connections, we ensure the metrics are stable, meaningful, and better aligned with Prometheus’s time-series data model.
 
 We can have a another component in future which reports realtime information about connections like cilium hubble. 
 
 Prometheus metrics exposed 
 
-- kmesh_tcp_connection_sent_bytes_total : The total number of bytes sent over established TCP connection
+- `kmesh_tcp_connection_sent_bytes_total` : The total number of bytes sent over established TCP connection
 
-- kmesh_tcp_connection_received_bytes_total : The total number of bytes received over established TCP connection
+- `kmesh_tcp_connection_received_bytes_total` : The total number of bytes received over established TCP connection
 
-- kmesh_tcp_connection_packet_lost_total : Total number of packets lost during transmission in a TCP connection
+- `kmesh_tcp_connection_packet_lost_total` : Total number of packets lost during transmission in a TCP connection
 
-- kmesh_tcp_connection_retrans_total : The total number of retransmits over established TCP connection
+- `kmesh_tcp_connection_retrans_total` : The total number of retransmits over established TCP connection
 
 The above metrics has following labels
 
diff --git a/pkg/controller/telemetry/metric.go b/pkg/controller/telemetry/metric.go
@@ -526,7 +526,7 @@ func (m *MetricController) Run(ctx context.Context, mapOfTcpInfo *ebpf.Map) {
 			}
 			m.updateServiceMetricCache(data, serviceLabels, tcpConns)
 			if m.EnableConnectionMetric.Load() && data.duration > LONG_CONN_METRIC_THRESHOLD {
-				deleteConnection = m.updateConnectionMetricCache(data, tcpConns[data.conSrcDstInfo], connectionLabels, deleteConnection)
+				m.updateConnectionMetricCache(data, tcpConns[data.conSrcDstInfo], connectionLabels)
 			}
 			m.mutex.Unlock()
 		}
@@ -906,7 +906,7 @@ func (m *MetricController) updateServiceMetricCache(data requestMetric, labels s
 	}
 }
 
-func (m *MetricController) updateConnectionMetricCache(data requestMetric, connData connMetric, labels connectionMetricLabels, delConn []*connectionMetricLabels) []*connectionMetricLabels {
+func (m *MetricController) updateConnectionMetricCache(data requestMetric, connData connMetric, labels connectionMetricLabels) {
 	v, ok := m.connectionMetricCache[labels]
 	if ok {
 		v.ConnSentBytes = v.ConnSentBytes + float64(data.sentBytes)
@@ -923,10 +923,9 @@ func (m *MetricController) updateConnectionMetricCache(data requestMetric, connD
 	}
 	if data.state == TCP_CLOSTED {
 		deleteLock.Lock()
-		delConn = append(delConn, &labels)
+		deleteConnection = append(deleteConnection, &labels)
 		deleteLock.Unlock()
 	}
-	return delConn
 }
 
 func (m *MetricController) updatePrometheusMetric() {
diff --git a/pkg/controller/telemetry/metric_test.go b/pkg/controller/telemetry/metric_test.go
@@ -618,15 +618,16 @@ func TestBuildConnectionMetricsToPrometheus(t *testing.T) {
 				},
 			}
 
-			delConn := m.updateConnectionMetricCache(tt.args.data, tt.args.tcpConns[tt.args.data.conSrcDstInfo], tt.args.labels, []*connectionMetricLabels{})
+			deleteConnection = []*connectionMetricLabels{}
+			m.updateConnectionMetricCache(tt.args.data, tt.args.tcpConns[tt.args.data.conSrcDstInfo], tt.args.labels)
 			assert.Equal(t, m.connectionMetricCache[tt.args.labels].ConnSentBytes, tt.want[0])
 			assert.Equal(t, m.connectionMetricCache[tt.args.labels].ConnReceivedBytes, tt.want[1])
 			assert.Equal(t, m.connectionMetricCache[tt.args.labels].ConnPacketLost, tt.want[2])
 			assert.Equal(t, m.connectionMetricCache[tt.args.labels].ConnTotalRetrans, tt.want[3])
 			if tt.id == 1 {
-				assert.Equal(t, len(delConn), 0)
+				assert.Equal(t, len(deleteConnection), 0)
 			} else {
-				assert.Equal(t, len(delConn), 1)
+				assert.Equal(t, len(deleteConnection), 1)
 			}
 		})
 	}
@@ -1344,7 +1345,7 @@ func TestBuildConnectionMetric(t *testing.T) {
 			},
 			want: connectionMetricLabels{
 				reporter:                "source",
-				startTime:               "0001-01-01 00:00:00 +0000 UTC",
+				startTime:               "",
 				sourceWorkload:          "sleep",
 				sourceCanonicalService:  "sleepCanonical",
 				sourceCanonicalRevision: "sleepVersion",
@@ -1386,6 +1387,7 @@ func TestBuildConnectionMetric(t *testing.T) {
 				serviceCache:  serviceCache,
 			}
 			got := m.buildConnectionMetric(tt.args.data)
+			got.startTime = ""
 			if !reflect.DeepEqual(got, tt.want) {
 				t.Errorf("Metric.buildMetric() = %v, want %v", got, tt.want)
 			}
@@ -1491,20 +1493,96 @@ func TestMetricController_updatePrometheusMetric(t *testing.T) {
 		responseFlags:                "-",
 		connectionSecurityPolicy:     "mutual_tls",
 	}
+
+	testConnLabel1 := connectionMetricLabels{
+		reporter:                "source",
+		startTime:               "0001-01-01 00:00:00 +0000 UTC",
+		sourceWorkload:          "sleep",
+		sourceCanonicalService:  "sleepCanonical",
+		sourceCanonicalRevision: "sleepVersion",
+		sourceWorkloadNamespace: "default",
+		sourcePrincipal:         "spiffe://cluster.local/ns/default/sa/default",
+		sourceApp:               "sleepCanonical",
+		sourceVersion:           "sleepVersion",
+		sourceCluster:           "Kubernetes",
+		sourceAddress:           "10.19.25.33:8000",
+
+		destinationAddress:           "10.19.25.31:8000",
+		destinationPodAddress:        "10.19.25.31",
+		destinationPodNamespace:      "kmesh-system",
+		destinationPodName:           "kmesh",
+		destinationService:           "kmesh.kmesh-system.svc.cluster.local",
+		destinationServiceNamespace:  "kmesh-system",
+		destinationServiceName:       "kmesh",
+		destinationWorkload:          "kmesh-daemon",
+		destinationCanonicalService:  "dstCanonical",
+		destinationCanonicalRevision: "dstVersion",
+		destinationWorkloadNamespace: "kmesh-system",
+		destinationPrincipal:         "spiffe://cluster.local/ns/kmesh-system/sa/default",
+		destinationApp:               "dstCanonical",
+		destinationVersion:           "dstVersion",
+		destinationCluster:           "Kubernetes",
+
+		requestProtocol: "tcp",
+		responseFlags:   "",
+
+		connectionSecurityPolicy: "mutual_tls",
+	}
+
+	testConnLabel2 := connectionMetricLabels{
+		reporter:                "destination",
+		startTime:               "0001-01-01 00:00:00 +0000 UTC",
+		sourceWorkload:          "sleep",
+		sourceCanonicalService:  "sleepCanonical",
+		sourceCanonicalRevision: "sleepVersion",
+		sourceWorkloadNamespace: "default",
+		sourcePrincipal:         "spiffe://cluster.local/ns/default/sa/default",
+		sourceApp:               "sleepCanonical",
+		sourceVersion:           "sleepVersion",
+		sourceCluster:           "Kubernetes",
+		sourceAddress:           "10.19.25.33:8000",
+
+		destinationAddress:           "10.19.25.31:8000",
+		destinationPodAddress:        "10.19.25.31",
+		destinationPodNamespace:      "kmesh-system",
+		destinationPodName:           "kmesh",
+		destinationService:           "kmesh.kmesh-system.svc.cluster.local",
+		destinationServiceNamespace:  "kmesh-system",
+		destinationServiceName:       "kmesh",
+		destinationWorkload:          "kmesh-daemon",
+		destinationCanonicalService:  "dstCanonical",
+		destinationCanonicalRevision: "dstVersion",
+		destinationWorkloadNamespace: "kmesh-system",
+		destinationPrincipal:         "spiffe://cluster.local/ns/kmesh-system/sa/default",
+		destinationApp:               "dstCanonical",
+		destinationVersion:           "dstVersion",
+		destinationCluster:           "Kubernetes",
+
+		requestProtocol: "tcp",
+		responseFlags:   "",
+
+		connectionSecurityPolicy: "mutual_tls",
+	}
+
 	workloadPrometheusLabel1 := struct2map(testworkloadLabel1)
 	workloadPrometheusLabel2 := struct2map(testworkloadLabel2)
 	servicePrometheusLabel1 := struct2map(testServiceLabel1)
 	servicePrometheusLabel2 := struct2map(testServiceLabel2)
+	connectionPrometheusLabel1 := struct2map(testConnLabel1)
+	connectionPrometheusLabel2 := struct2map(testConnLabel2)
+
 	tests := []struct {
-		name                  string
-		workloadMetricCache   workloadMetricInfo
-		serviceMetricCache    serviceMetricInfo
-		exportWorkloadMetrics []*prometheus.GaugeVec
-		exportServiceMetrics  []*prometheus.GaugeVec
-		want                  []float64
+		name                    string
+		workloadMetricCache     workloadMetricInfo
+		serviceMetricCache      serviceMetricInfo
+		connectionMetricCache   connectionMetricInfo
+		exportWorkloadMetrics   []*prometheus.GaugeVec
+		exportServiceMetrics    []*prometheus.GaugeVec
+		exportConnectionMetrics []*prometheus.GaugeVec
+		want                    []float64
 	}{
 		{
-			name: "update workload metric in Prometheus",
+			name: "update metric in Prometheus",
 			workloadMetricCache: workloadMetricInfo{
 				WorkloadConnOpened:        1,
 				WorkloadConnClosed:        2,
@@ -1521,6 +1599,12 @@ func TestMetricController_updatePrometheusMetric(t *testing.T) {
 				ServiceConnSentBytes:     9,
 				ServiceConnReceivedBytes: 10,
 			},
+			connectionMetricCache: connectionMetricInfo{
+				ConnSentBytes:     1,
+				ConnReceivedBytes: 2,
+				ConnPacketLost:    3,
+				ConnTotalRetrans:  4,
+			},
 			exportWorkloadMetrics: []*prometheus.GaugeVec{
 				tcpConnectionOpenedInWorkload,
 				tcpConnectionClosedInWorkload,
@@ -1535,6 +1619,12 @@ func TestMetricController_updatePrometheusMetric(t *testing.T) {
 				tcpSentBytesInService,
 				tcpReceivedBytesInService,
 			},
+			exportConnectionMetrics: []*prometheus.GaugeVec{
+				tcpConnectionTotalSendBytes,
+				tcpConnectionTotalReceivedBytes,
+				tcpConnectionTotalPacketLost,
+				tcpConnectionTotalRetrans,
+			},
 			want: []float64{
 				1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
 			},
@@ -1554,6 +1644,10 @@ func TestMetricController_updatePrometheusMetric(t *testing.T) {
 					testServiceLabel1: &tt.serviceMetricCache,
 					testServiceLabel2: &tt.serviceMetricCache,
 				},
+				connectionMetricCache: map[connectionMetricLabels]*connectionMetricInfo{
+					testConnLabel1: &tt.connectionMetricCache,
+					testConnLabel2: &tt.connectionMetricCache,
+				},
 			}
 			m.updatePrometheusMetric()
 			index := 0
@@ -1571,6 +1665,14 @@ func TestMetricController_updatePrometheusMetric(t *testing.T) {
 				assert.Equal(t, tt.want[index], v2)
 				index = index + 1
 			}
+			index = 0
+			for _, metric := range tt.exportConnectionMetrics {
+				v1 := testutil.ToFloat64(metric.With(connectionPrometheusLabel1))
+				assert.Equal(t, tt.want[index], v1)
+				v2 := testutil.ToFloat64(metric.With(connectionPrometheusLabel2))
+				assert.Equal(t, tt.want[index], v2)
+				index = index + 1
+			}
 			cancel()
 		})
 	}

Original file line number	Diff line number	Diff line change
`@@ -291,7 +291,7 @@ func SetWorkloadMetricsPerKmeshDaemon(cli kube.CLIClient, podName, workloadMetri`
`291`	`291`	`return`
`292`	`292`	`}`
`293`	`293`	`bodyString := string(bodyBytes)`
`294`		`- if resp.StatusCode == http.StatusBadRequest && bytes.Contains(bodyBytes, []byte("Kmesh monitoring is disable, cannot enable accesslog")) {`
	`294`	`+ if resp.StatusCode == http.StatusBadRequest && bytes.Contains(bodyBytes, []byte("Kmesh monitoring is disable, cannot enable workloadMetrics")) {`
`295`	`295`	log.Errorf("failed to enable workload metrics: %v. Need to start Kmesh's Monitoring. Please run `kmeshctl monitoring -h` for more help.", bodyString)
`296`	`296`	`return`
`297`	`297`	`}`
`@@ -346,7 +346,7 @@ func SetConnectionMetricsPerKmeshDaemon(cli kube.CLIClient, podName, connectionM`
`346`	`346`	`return`
`347`	`347`	`}`
`348`	`348`	`bodyString := string(bodyBytes)`
`349`		`- if resp.StatusCode == http.StatusBadRequest && bytes.Contains(bodyBytes, []byte("Kmesh monitoring is disable, cannot enable accesslog")) {`
	`349`	`+ if resp.StatusCode == http.StatusBadRequest && bytes.Contains(bodyBytes, []byte("Kmesh monitoring is disable, cannot enable connectionMetrics")) {`
`350`	`350`	log.Errorf("failed to enable connection metrics: %v. Need to start Kmesh's Monitoring. Please run `kmeshctl monitoring -h` for more help.", bodyString)
`351`	`351`	`return`
`352`	`352`	`}`
Original file line number	Diff line number	Diff line change
`@@ -526,7 +526,7 @@ func (m MetricController) Run(ctx context.Context, mapOfTcpInfo ebpf.Map) {`
`526`	`526`	`}`
`527`	`527`	`m.updateServiceMetricCache(data, serviceLabels, tcpConns)`
`528`	`528`	`if m.EnableConnectionMetric.Load() && data.duration > LONG_CONN_METRIC_THRESHOLD {`
`529`		`- deleteConnection = m.updateConnectionMetricCache(data, tcpConns[data.conSrcDstInfo], connectionLabels, deleteConnection)`
	`529`	`+ m.updateConnectionMetricCache(data, tcpConns[data.conSrcDstInfo], connectionLabels)`
`530`	`530`	`}`
`531`	`531`	`m.mutex.Unlock()`
`532`	`532`	`}`
`@@ -906,7 +906,7 @@ func (m *MetricController) updateServiceMetricCache(data requestMetric, labels s`
`906`	`906`	`}`
`907`	`907`	`}`
`908`	`908`
`909`		`-func (m MetricController) updateConnectionMetricCache(data requestMetric, connData connMetric, labels connectionMetricLabels, delConn []connectionMetricLabels) []*connectionMetricLabels {`
	`909`	`+func (m *MetricController) updateConnectionMetricCache(data requestMetric, connData connMetric, labels connectionMetricLabels) {`
`910`	`910`	`v, ok := m.connectionMetricCache[labels]`
`911`	`911`	`if ok {`
`912`	`912`	`v.ConnSentBytes = v.ConnSentBytes + float64(data.sentBytes)`
`@@ -923,10 +923,9 @@ func (m *MetricController) updateConnectionMetricCache(data requestMetric, connD`
`923`	`923`	`}`
`924`	`924`	`if data.state == TCP_CLOSTED {`
`925`	`925`	`deleteLock.Lock()`
`926`		`- delConn = append(delConn, &labels)`
	`926`	`+ deleteConnection = append(deleteConnection, &labels)`
`927`	`927`	`deleteLock.Unlock()`
`928`	`928`	`}`
`929`		`- return delConn`
`930`	`929`	`}`
`931`	`930`
`932`	`931`	`func (m *MetricController) updatePrometheusMetric() {`