Skip to content

Commit afee353

Browse files
authored
Fix wrong conntrack data from eBPF (#111)
1 parent d76ee40 commit afee353

File tree

4 files changed

+63
-5
lines changed

4 files changed

+63
-5
lines changed

bpf/accesslog/syscalls/connect_conntrack.c

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ static __always_inline void nf_conntrack_read_in6_addr(__u64 *addr_h, __u64 *add
2323
bpf_probe_read(addr_l, sizeof(*addr_l), &in6->s6_addr32[2]);
2424
}
2525

26-
static __always_inline int nf_conntrack_tuple_to_conntrack_tuple(conntrack_tuple_t *t, const struct nf_conntrack_tuple *ct) {
26+
static __always_inline int nf_conntrack_tuple_to_conntrack_tuple(struct connect_args_t *connect_args, conntrack_tuple_t *t, const struct nf_conntrack_tuple *ct) {
2727
__builtin_memset(t, 0, sizeof(conntrack_tuple_t));
2828

2929
switch (ct->dst.protonum) {
@@ -60,6 +60,25 @@ static __always_inline int nf_conntrack_tuple_to_conntrack_tuple(conntrack_tuple
6060
return 0;
6161
}
6262
}
63+
64+
struct sock *sock = connect_args->sock;
65+
struct socket *tmps = _(sock->sk_socket);
66+
if (tmps != NULL) {
67+
struct sock* s;
68+
BPF_CORE_READ_INTO(&s, tmps, sk);
69+
short unsigned int skc_family;
70+
BPF_CORE_READ_INTO(&skc_family, s, __sk_common.skc_family);
71+
if (skc_family == AF_INET) {
72+
__u16 local_port;
73+
BPF_CORE_READ_INTO(&local_port, s, __sk_common.skc_num);
74+
__u32 local_addr_v4;
75+
BPF_CORE_READ_INTO(&local_addr_v4, s, __sk_common.skc_rcv_saddr);
76+
// make sure connntrack with the same socket address
77+
if (local_addr_v4 != t->daddr_l || local_port != t->dport) {
78+
return 0;
79+
}
80+
}
81+
}
6382
return 1;
6483
}
6584

@@ -73,6 +92,11 @@ static __always_inline int nf_conn_aware(struct pt_regs* ctx, struct nf_conn *ct
7392
return 0;
7493
}
7594

95+
// already contains the remote address
96+
if (&(connect_args->remote) != NULL) {
97+
return 0;
98+
}
99+
76100
__u32 status;
77101
if (bpf_probe_read(&status, sizeof(status), &(ct->status)) != 0) {
78102
return 0; // Invalid ct pointer
@@ -93,7 +117,7 @@ static __always_inline int nf_conn_aware(struct pt_regs* ctx, struct nf_conn *ct
93117
struct nf_conntrack_tuple reply = tuplehash[IP_CT_DIR_REPLY].tuple;
94118

95119
conntrack_tuple_t reply_conn = {};
96-
if (!nf_conntrack_tuple_to_conntrack_tuple(&reply_conn, &reply)) {
120+
if (!nf_conntrack_tuple_to_conntrack_tuple(connect_args, &reply_conn, &reply)) {
97121
return 0;
98122
}
99123

pkg/accesslog/collector/connect.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ func (c *ConnectCollector) buildSocketFromConnectEvent(event *events.SocketConne
116116

117117
pair, err := ip.ParseSocket(event.PID, event.SocketFD)
118118
if err != nil {
119-
connectLogger.Warnf("cannot found the socket, pid: %d, socket FD: %d", event.PID, event.SocketFD)
119+
connectLogger.Debugf("cannot found the socket, pid: %d, socket FD: %d", event.PID, event.SocketFD)
120120
return nil
121121
}
122122
connectLogger.Debugf("found the connection from the socket, connection ID: %d, randomID: %d",

pkg/accesslog/collector/protocols/queue.go

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,14 @@ package protocols
1919

2020
import (
2121
"context"
22+
"errors"
2223
"fmt"
2324
"os"
2425
"sync"
2526
"time"
2627

28+
"github.com/cilium/ebpf"
29+
2730
"github.com/apache/skywalking-rover/pkg/accesslog/common"
2831
"github.com/apache/skywalking-rover/pkg/accesslog/events"
2932
"github.com/apache/skywalking-rover/pkg/accesslog/forwarder"
@@ -212,7 +215,14 @@ func (p *PartitionContext) processEvents() {
212215
p.processConnectionEvents(info)
213216

214217
// if the connection already closed and not contains any buffer data, then delete the connection
215-
if info.closed && info.dataBuffer.DataLength() == 0 {
218+
bufLen := info.dataBuffer.DataLength()
219+
if bufLen > 0 {
220+
return
221+
}
222+
if !info.closed {
223+
p.checkTheConnectionIsAlreadyClose(info)
224+
}
225+
if info.closed {
216226
if info.closeCallback != nil {
217227
info.closeCallback()
218228
}
@@ -225,6 +235,26 @@ func (p *PartitionContext) processEvents() {
225235
}
226236
}
227237

238+
func (p *PartitionContext) checkTheConnectionIsAlreadyClose(con *PartitionConnection) {
239+
if time.Since(con.lastCheckCloseTime) <= time.Second*30 {
240+
return
241+
}
242+
con.lastCheckCloseTime = time.Now()
243+
var activateConn common.ActiveConnection
244+
if err := p.context.BPF.ActiveConnectionMap.Lookup(con.connectionID, &activateConn); err != nil {
245+
if errors.Is(err, ebpf.ErrKeyNotExist) {
246+
con.closed = true
247+
return
248+
}
249+
log.Warnf("cannot found the active connection: %d-%d, err: %v", con.connectionID, con.randomID, err)
250+
return
251+
} else if activateConn.RandomID != 0 && activateConn.RandomID != con.randomID {
252+
log.Debugf("detect the connection: %d-%d is already closed, so remove from the activate connection",
253+
con.connectionID, con.randomID)
254+
con.closed = true
255+
}
256+
}
257+
228258
func (p *PartitionContext) processExpireEvents() {
229259
// the expiry must be mutual exclusion with events processor
230260
p.analyzeLocker.Lock()
@@ -267,6 +297,7 @@ type PartitionConnection struct {
267297
closed bool
268298
closeCallback common.ConnectionProcessFinishCallback
269299
skipAllDataAnalyze bool
300+
lastCheckCloseTime time.Time
270301
}
271302

272303
func (p *PartitionConnection) appendDetail(ctx *common.AccessLogContext, detail *events.SocketDetailEvent) {

pkg/accesslog/common/connection.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,10 @@ func (c *ConnectionManager) OnBuildConnectionLogFinished() {
574574
func (c *ConnectionManager) SkipAllDataAnalyze(conID, ranID uint64) {
575575
var activateConn ActiveConnection
576576
if err := c.activeConnectionMap.Lookup(conID, &activateConn); err != nil {
577-
log.Warnf("cannot found the active connection: %d-%d", conID, ranID)
577+
if errors.Is(err, ebpf.ErrKeyNotExist) {
578+
return
579+
}
580+
log.Warnf("cannot found the active connection: %d-%d, err: %v", conID, ranID, err)
578581
return
579582
}
580583
if activateConn.RandomID != ranID {

0 commit comments

Comments
 (0)