Skip to content

Commit 128777a

Browse files
committed
rfac: changed tcp_conn proposal based on new approach
Signed-off-by: Yash Patel <[email protected]>
1 parent aa15f4f commit 128777a

File tree

2 files changed

+42
-158
lines changed

2 files changed

+42
-158
lines changed
77.5 KB
Loading

docs/proposal/tcp_long_connection_metrics.md

Lines changed: 42 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ The "Design Details" section below is for the real
9191
nitty-gritty.
9292
-->
9393

94-
TCP connection information will be collected using eBPF sockops, sk_msg and kprobes hooks, and stored in ebpf hash maps using socket cookie as a unique key for hashmap. RingBuffer map is used for sending connection info periodically to userspace.
94+
TCP connection information will be collected using eBPF cgroup_skb hook. RingBuffer map is used for sending connection info periodically to userspace.
9595

9696

9797
### Design Details
@@ -105,185 +105,69 @@ proposal will be implemented, this is the place to discuss them.
105105

106106
#### Collecting Metrics
107107

108-
Decelearing ebpf hash map in probe.h to store information about tcp_connections.
108+
Decelearing ebpf cgroup_skb hooks, which will trigger when the traffic passes through the cgroup socket.
109109

110110
```
111-
// Ebpf map to store active tcp connections
112-
struct {
113-
__uint(type, BPF_MAP_TYPE_HASH);
114-
__type(key, __u64); // use sock_cookie as key
115-
__type(value, struct tcp_probe_info);
116-
__uint(max_entries, MAP_SIZE_OF_TCP_CONNS);
117-
__uint(map_flags, BPF_F_NO_PREALLOC);
118-
} map_of_tcp_conns SEC(".maps");
119-
120-
```
121-
Sockpos ebpf hook is triggered at various socket events, we will use this hook to store and refresh connection information at the time of connection established, connection state change, retransmits (also trigger in packet losss).
122-
Updating workload/sockops.c
123-
124-
```
125-
SEC("sockops_active")
126-
int sockops_active_prog(struct bpf_sock_ops *skops)
111+
SEC("cgroup_skb/ingress")
112+
int cgroup_skb_ingress_prog(struct __sk_buff *skb)
127113
{
128-
__u64 sock_cookie = bpf_get_socket_cookie(skops);
129-
130-
if (skops->family != AF_INET && skops->family != AF_INET6)
131-
return 0;
132-
133-
switch (skops->op) {
134-
case BPF_SOCK_OPS_TCP_CONNECT_CB:
135-
skops_handle_kmesh_managed_process(skops);
136-
break;
137-
138-
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
139-
if (!is_managed_by_kmesh(skops))
140-
break;
141-
observe_on_connect_established(skops->sk, sock_cookie, OUTBOUND);
142-
if (bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG) != 0
143-
|| bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_RETRANS_CB_FLAG) != 0
144-
|| bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_RTT_CB_FLAG) != 0) {
145-
BPF_LOG(ERR, SOCKOPS, "set sockops cb failed!\n");
146-
}
147-
__u64 *current_sk = (__u64 *)skops->sk;
148-
struct bpf_sock_tuple *dst = bpf_map_lookup_elem(&map_of_orig_dst, current_sk);
149-
if (dst != NULL)
150-
enable_encoding_metadata(skops);
151-
break;
152-
153-
default:
154-
break;
155-
}
156-
return 0;
157-
}
114+
if (skb->family != AF_INET && skb->family != AF_INET6)
115+
return SK_PASS;
158116
159-
SEC("sockops_passive")
160-
int sockops_passive_prog(struct bpf_sock_ops *skops)
161-
{
162-
__u64 sock_cookie = bpf_get_socket_cookie(skops);
163-
164-
if (skops->family != AF_INET && skops->family != AF_INET6)
165-
return 0;
166-
167-
switch (skops->op) {
168-
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
169-
if (!is_managed_by_kmesh(skops) || skip_specific_probe(skops))
170-
break;
171-
observe_on_connect_established(skops->sk, sock_cookie, INBOUND);
172-
if (bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG) != 0
173-
|| bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_RETRANS_CB_FLAG) != 0
174-
|| bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_RTT_CB_FLAG) != 0) {
175-
BPF_LOG(ERR, SOCKOPS, "set sockops cb failed!\n");
176-
}
177-
auth_ip_tuple(skops);
178-
break;
179-
180-
default:
181-
break;
182-
}
183-
return 0;
184-
}
117+
struct bpf_sock *sk = skb->sk;
118+
if (!sk)
119+
return SK_PASS;
185120
186-
SEC("sockops_utils")
187-
int sockops_utils_prog(struct bpf_sock_ops *skops)
188-
{
189-
// Filter by IPv4 or IPv6
190-
if (skops->family != AF_INET && skops->family != AF_INET6)
191-
return 0;
192-
193-
switch (skops->op) {
194-
case BPF_SOCK_OPS_STATE_CB:
195-
if (skops->args[1] == BPF_TCP_CLOSE) {
196-
clean_auth_map(skops);
197-
clean_dstinfo_map(skops);
198-
}
199-
if (!is_managed_by_kmesh(skops))
200-
break;
201-
observe_on_status_change(skops->sk, skops->args[0]);
202-
break;
203-
204-
case BPF_SOCK_OPS_RETRANS_CB:
205-
if (!is_managed_by_kmesh(skops))
206-
break;
207-
observe_on_retransmit(skops->sk);
208-
break;
209-
210-
case BPF_SOCK_OPS_RTT_CB:
211-
if (!is_managed_by_kmesh(skops))
212-
break;
213-
observe_on_rtt(skops->sk);
214-
break;
215-
216-
default:
217-
break;
218-
}
219-
return 0;
121+
if (!is_managed_by_kmesh_skb(skb))
122+
return SK_PASS;
123+
observe_on_data(sk);
124+
return SK_PASS;
220125
}
221126
222-
```
223-
224-
Sk_msg hook is triggered when the packet leaves the socket, we will be using sk_msg ebpf hook for refreshing sent bytes data, also we are triggering flush_conn function here to send the connection info to userspace using ringbuffer map.
225-
Updating sendmsg_prog func in send_msg.c
226-
227-
```
228-
SEC("sk_msg")
229-
int sendmsg_prog(struct sk_msg_md *msg)
127+
SEC("cgroup_skb/egress")
128+
int cgroup_skb_egress_prog(struct __sk_buff *skb)
230129
{
231-
__u32 off = 0;
232-
if (msg->family != AF_INET && msg->family != AF_INET6)
130+
if (skb->family != AF_INET && skb->family != AF_INET6)
233131
return SK_PASS;
234132
235-
// encode org dst addr
236-
encode_metadata_org_dst_addr(msg, &off, (msg->family == AF_INET));
237-
238-
struct bpf_sock *sk = msg->sk;
239-
240-
if (sk) {
241-
if (is_managed_by_kmesh_skmsg(msg)) {
242-
observe_on_data(sk);
243-
}
244-
} else {
245-
BPF_LOG(ERR, KMESH, "sk_lookup success\n");
246-
}
247-
int key = 0;
248-
__u64 *last_time = bpf_map_lookup_elem(&tcp_conn_last_flush, &key);
249-
__u64 now = bpf_ktime_get_ns();
133+
struct bpf_sock *sk = skb->sk;
134+
if (!sk)
135+
return SK_PASS;
250136
251-
if (!last_time) {
252-
__u64 init_time = now;
253-
// Initialize last flush time if not set
254-
bpf_map_update_elem(&tcp_conn_last_flush, &key, &init_time, BPF_ANY);
255-
} else if ((now - *last_time) >= TIMER_INTERVAL_NS) {
256-
flush_tcp_conns();
257-
// Update last flush time
258-
bpf_map_update_elem(&tcp_conn_last_flush, &key, &now, BPF_ANY);
259-
}
137+
if (!is_managed_by_kmesh_skb(skb))
138+
return SK_PASS;
139+
observe_on_data(sk);
260140
return SK_PASS;
261141
}
262142
263143
```
144+
Observe_on_data function checks if the time elapsed after lsat_report is greaater than 5 sec. and if it is greater, it report's the conn_info to the ring_buffer and updates the last_report_ns.
264145

265-
For refreshing the received bytes by a connection, we will attach a kprobe on tcp_rcv_established.
266-
Creating workload/kprobe.c
267146
```
268-
SEC("kprobe/tcp_rcv_established")
269-
int bpf_tcp_rcv_established(struct pt_regs *ctx) {
270-
271-
struct sk_buff *skb = (struct sk_buff *)PT_REGS_PARM2(ctx);
272-
struct bpf_sock *sk = skb->sk;
273-
if (sk) {
274-
if (is_managed_by_kmesh_skb(skb)) {
275-
observe_on_data(sk);
276-
}
277-
} else {
278-
BPF_LOG(ERR, KMESH, "sk_lookup success\n");
147+
static inline void observe_on_data(struct bpf_sock *sk)
148+
{
149+
struct bpf_tcp_sock *tcp_sock = NULL;
150+
struct sock_storage_data *storage = NULL;
151+
if (!sk)
152+
return;
153+
tcp_sock = bpf_tcp_sock(sk);
154+
if (!tcp_sock)
155+
return;
156+
157+
storage = bpf_sk_storage_get(&map_of_sock_storage, sk, 0, 0);
158+
if (!storage) {
159+
return;
160+
}
161+
__u64 now = bpf_ktime_get_ns();
162+
if ((storage->last_report_ns != 0) && (now - storage->last_report_ns > LONG_CONN_THRESHOLD_TIME)) {
163+
tcp_report(sk, tcp_sock, storage, BPF_TCP_ESTABLISHED);
279164
}
280-
return 0;
281165
}
282166
```
283167

284-
We will update functions of tcp_probe.h to store and refresh the connection information on the hash map.
285-
286168
We will update the functions of metric.go for periodic updating the workload and service metrics, also we will create a new metric for long tcp connections.
169+
170+
![design](./pics/tcp_long_conn_design.png)
287171
#### User Stories (Optional)
288172

289173
<!--

0 commit comments

Comments
 (0)