Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/config/setup/system_probe.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,8 @@ func InitSystemProbeConfig(cfg pkgconfigmodel.Setup) {
cfg.BindEnvAndSetDefault("network_config.enable_ebpfless", false, "DD_ENABLE_EBPFLESS", "DD_NETWORK_CONFIG_ENABLE_EBPFLESS")

cfg.BindEnvAndSetDefault("network_config.enable_co_re", true)
cfg.BindEnvAndSetDefault("network_config.enable_fentry", false)
cfg.BindEnvAndSetDefault("network_config.enable_sk_tracer", false)
cfg.BindEnvAndSetDefault("network_config.enable_fentry", true, "DD_SYSTEM_PROBE_NETWORK_ENABLE_FENTRY", "DD_NETWORK_CONFIG_ENABLE_FENTRY")
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Keep fentry default aligned with config schema

This flips network_config.enable_fentry to default true at runtime, but the schema still declares it as false (pkg/config/schema/system-probe_schema.yaml), so schema-driven outputs and tooling can disagree with actual agent behavior. That mismatch makes troubleshooting and rollout control error-prone (for example, generated/default config views show fentry off while the process enables it). Please update the schema/default metadata in the same change so all config paths agree.

Useful? React with 👍 / 👎.


// TLS cert collection
cfg.BindEnvAndSetDefault("network_config.enable_cert_collection", false)
Expand Down
2 changes: 1 addition & 1 deletion pkg/network/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ type Config struct {
// EnableEbpfless enables the use of network tracing without eBPF using packet capture.
EnableEbpfless bool

// EnableFentry enables the experimental fentry tracer (disabled by default)
// EnableFentry enables the fentry tracer (disabled by default, enable via config or DD_SYSTEM_PROBE_NETWORK_ENABLE_FENTRY)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P3 Badge Align fentry default comment with actual default

The field comment says fentry is "disabled by default," but this commit sets network_config.enable_fentry default to true in InitSystemProbeConfig (pkg/config/setup/system_probe.go). This mismatch creates stale in-repo documentation and can mislead operators or tooling that reads these comments about how the tracer is selected by default.

Useful? React with 👍 / 👎.

EnableFentry bool

// EnableCORETracer enables the CO-RE version of the tracer
Expand Down
148 changes: 147 additions & 1 deletion pkg/network/ebpf/c/co-re/tracer-fentry.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "bpf_tracing.h"
#include "bpf_metadata.h"

#include "bpf_bypass.h"

#include "ip.h"
#include "ipv6.h"
#include "sock.h"
Expand All @@ -18,6 +20,9 @@
#include "tracer/telemetry.h"
#include "tracer/port.h"

#include "protocols/classification/protocol-classification.h"
#include "protocols/tls/tls-certs.h"

BPF_PERCPU_HASH_MAP(udp6_send_skb_args, u64, u64, 1024)
BPF_PERCPU_HASH_MAP(udp_send_skb_args, u64, conn_tuple_t, 1024)

Expand Down Expand Up @@ -245,7 +250,49 @@ int BPF_PROG(tcp_close, struct sock *sk, long timeout) {

bpf_map_delete_elem(&tcp_ongoing_connect_pid, &skp_conn);

cleanup_conn(ctx, &t, sk);
handle_tcp_failure(sk, &t);
clean_protocol_classification(&t);

if (cleanup_conn(ctx, &t, sk) == 0) {
increment_telemetry_count(tcp_close_connection_flush);
}
return 0;
}

SEC("fentry/tcp_done")
int BPF_PROG(tcp_done, struct sock *sk) {
// NOTE: no RETURN_IF_NOT_IN_SYSPROBE_TASK here — tcp_done often fires from
// timeout/RST paths in idle/softirq context where the PID namespace check
// would incorrectly reject the event, silently dropping failed connections.
conn_tuple_t t = {};

if (!read_conn_tuple(&t, sk, 0, CONN_TYPE_TCP)) {
increment_telemetry_count(tcp_done_failed_tuple);
return 0;
}
log_debug("fentry/tcp_done: netns: %u, sport: %u, dport: %u", t.netns, t.sport, t.dport);
skp_conn_tuple_t skp_conn = {.sk = sk, .tup = t};

// connection timeouts will have 0 pids as they are cleaned up by an idle process.
// resets can also have kernel pids as they are triggered by receiving an RST packet from the server
// get the pid from the ongoing failure map in this case, as it should have been set in connect(). else bail
pid_ts_t *failed_conn_pid = bpf_map_lookup_elem(&tcp_ongoing_connect_pid, &skp_conn);
if (failed_conn_pid) {
bpf_map_delete_elem(&tcp_ongoing_connect_pid, &skp_conn);
t.pid = GET_USER_MODE_PID(failed_conn_pid->pid_tgid);
} else {
increment_telemetry_count(tcp_done_missing_pid);
return 0;
}

if (!handle_tcp_failure(sk, &t)) {
return 0;
}

if (cleanup_conn(ctx, &t, sk) == 0) {
increment_telemetry_count(tcp_done_connection_flush);
}

return 0;
}

Expand Down Expand Up @@ -599,6 +646,105 @@ int BPF_PROG(inet6_bind_exit, struct socket *sock, struct sockaddr *uaddr, int a
return sys_exit_bind(rc);
}

// tcp_read_sock fexit probe — fexit gives us both args and return value,
// so no fentry entry probe is needed
SEC("fexit/tcp_read_sock")
int BPF_PROG(tcp_read_sock_exit, struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor, int recv) {
RETURN_IF_NOT_IN_SYSPROBE_TASK("fexit/tcp_read_sock");
if (recv < 0) {
return 0;
}

u64 pid_tgid = bpf_get_current_pid_tgid();
return handle_tcp_recv(pid_tgid, sk, recv);
}

// Protocol classification socket filters
SEC("socket/classifier_entry")
int socket__classifier_entry(struct __sk_buff *skb) {
protocol_classifier_entrypoint(skb);
return 0;
}

SEC("socket/classifier_tls_handshake_client")
int socket__classifier_tls_handshake_client(struct __sk_buff *skb) {
protocol_classifier_entrypoint_tls_handshake_client(skb);
return 0;
}

SEC("socket/classifier_tls_handshake_server")
int socket__classifier_tls_handshake_server(struct __sk_buff *skb) {
protocol_classifier_entrypoint_tls_handshake_server(skb);
return 0;
}

SEC("socket/classifier_queues")
int socket__classifier_queues(struct __sk_buff *skb) {
protocol_classifier_entrypoint_queues(skb);
return 0;
}

SEC("socket/classifier_dbs")
int socket__classifier_dbs(struct __sk_buff *skb) {
protocol_classifier_entrypoint_dbs(skb);
return 0;
}

SEC("socket/classifier_grpc")
int socket__classifier_grpc(struct __sk_buff *skb) {
protocol_classifier_entrypoint_grpc(skb);
return 0;
}

static __always_inline struct sock *fentry_sk_buff_sk(struct sk_buff *skb) {
struct sock *sk = NULL;
BPF_CORE_READ_INTO(&sk, skb, sk);
return sk;
}

static __always_inline int fentry_handle_net_dev_queue(struct sk_buff* skb) {
struct sock *sk = fentry_sk_buff_sk(skb);
if (!sk) {
return 0;
}

conn_tuple_t skb_tup;
bpf_memset(&skb_tup, 0, sizeof(conn_tuple_t));
if (sk_buff_to_tuple(skb, &skb_tup) <= 0) {
return 0;
}

if (!(skb_tup.metadata & CONN_TYPE_TCP)) {
return 0;
}

conn_tuple_t sock_tup;
bpf_memset(&sock_tup, 0, sizeof(conn_tuple_t));
if (!read_conn_tuple(&sock_tup, sk, 0, CONN_TYPE_TCP)) {
return 0;
}
sock_tup.netns = 0;
sock_tup.pid = 0;

if (!is_equal(&skb_tup, &sock_tup)) {
normalize_tuple(&skb_tup);
normalize_tuple(&sock_tup);
bpf_map_update_with_telemetry(conn_tuple_to_socket_skb_conn_tuple, &sock_tup, &skb_tup, BPF_NOEXIST, -EEXIST);
}

return 0;
}

// fentry requires 5.8+ which always supports raw tracepoints (4.17+),
// so no tracepoint or kprobe fallbacks are needed.
SEC("raw_tracepoint/net/net_dev_queue")
int BPF_PROG(raw_tracepoint__net__net_dev_queue, struct sk_buff *skb) {
if (!skb) {
return 0;
}
return fentry_handle_net_dev_queue(skb);
}

// tcp_enter_loss, tcp_enter_recovery, and tcp_send_probe0 are static kernel
// functions (not exported via BTF), so they must use kprobes even in the
// fentry tracer. They fire from kernel timer/softirq context. The shared
Expand Down
23 changes: 18 additions & 5 deletions pkg/network/tracer/connection/ebpf_tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,17 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace
mgrOptions.MapSpecEditors[probes.PortBindingsMap] = manager.MapSpecEditor{MaxEntries: config.MaxTrackedConnections, EditorFlag: manager.EditMaxEntries}
mgrOptions.MapSpecEditors[probes.UDPPortBindingsMap] = manager.MapSpecEditor{MaxEntries: config.MaxTrackedConnections, EditorFlag: manager.EditMaxEntries}

log.Info("JMW network tracer: attempting to load SK tracer")
m, closeTracerFn, err = sk.LoadTracer(config, mgrOptions, connCloseEventHandler)
if err != nil && !errors.Is(err, sk.ErrorDisabled) {
// failed to load sk tracer
return nil, err
}
if err == nil {
log.Info("JMW network tracer: SK tracer loaded successfully")
} else {
log.Info("JMW network tracer: SK tracer is disabled (network_config.enable_sk_tracer=false), skipping")
}

if err != nil {
mgrOptions.MapSpecEditors[probes.ConnMap] = manager.MapSpecEditor{MaxEntries: config.MaxTrackedConnections, EditorFlag: manager.EditMaxEntries}
Expand All @@ -248,21 +254,28 @@ func newEbpfTracer(config *config.Config, _ telemetryComponent.Component) (Trace
mgrOptions.MapSpecEditors[probes.TCPRecvMsgArgsMap] = manager.MapSpecEditor{MaxEntries: config.MaxTrackedConnections / 32, EditorFlag: manager.EditMaxEntries}

tracerType = TracerTypeFentry
log.Info("JMW network tracer: attempting to load fentry tracer")
m, closeTracerFn, err = fentry.LoadTracer(config, mgrOptions, connCloseEventHandler)
if err != nil && !errors.Is(err, fentry.ErrorDisabled) {
// failed to load fentry tracer
return nil, err
switch {
case err == nil:
log.Info("JMW network tracer: fentry tracer loaded successfully")
case errors.Is(err, fentry.ErrorDisabled):
log.Info("JMW network tracer: fentry tracer is disabled (network_config.enable_fentry=false), skipping")
default:
// fentry failed to load — fall back to kprobe instead of hard-failing
log.Warnf("JMW network tracer: fentry tracer load failed, falling back to kprobe: %s", err)
err = fentry.ErrorDisabled // treat as disabled so we fall through to kprobe
}

if err != nil {
// load the kprobe tracer
log.Info("loading kprobe-based tracer")
log.Info("JMW network tracer: attempting to load kprobe tracer")
var kprobeTracerType kprobe.TracerType
m, closeTracerFn, kprobeTracerType, err = kprobe.LoadTracer(config, mgrOptions, connCloseEventHandler)
if err != nil {
return nil, err
}
tracerType = TracerType(kprobeTracerType)
log.Info("JMW network tracer: kprobe tracer loaded successfully")
}
}
m.DumpHandler = dumpMapsHandler
Expand Down
17 changes: 17 additions & 0 deletions pkg/network/tracer/connection/fentry/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,26 @@ func initManager(mgr *ddebpf.Manager) {
mgr.Maps = []*manager.Map{
{Name: probes.ConnMap},
{Name: probes.TCPStatsMap},
{Name: probes.TCPRetransmitsMap},
{Name: probes.TCPOngoingConnectPid},
{Name: "udp_recv_sock"},
{Name: "udpv6_recv_sock"},
{Name: probes.PortBindingsMap},
{Name: probes.UDPPortBindingsMap},
{Name: "pending_bind"},
{Name: probes.TelemetryMap},
{Name: probes.ConnectionProtocolMap},
{Name: probes.ClassificationProgsMap},
{Name: probes.EnhancedTLSTagsMap},
{Name: probes.ConnectionTupleToSocketSKBConnMap},
{Name: probes.TCPFailureTelemetry},
// SSL cert maps
{Name: probes.SSLCertsStatemArgsMap},
{Name: probes.SSLCertsI2DX509ArgsMap},
{Name: probes.SSLHandshakeStateMap},
{Name: probes.SSLCertInfoMap},
}

for funcName := range programs {
p := &manager.Probe{
ProbeIdentificationPair: manager.ProbeIdentificationPair{
Expand All @@ -36,4 +48,9 @@ func initManager(mgr *ddebpf.Manager) {
}
mgr.Probes = append(mgr.Probes, p)
}

// Add the raw tracepoint probe with its special TracepointName/Category
mgr.Probes = append(mgr.Probes,
&manager.Probe{ProbeIdentificationPair: manager.ProbeIdentificationPair{EBPFFuncName: netDevQueueRawTracepoint, UID: probeUID}, TracepointName: "net_dev_queue", TracepointCategory: "net"},
)
}
Loading
Loading