Skip to content

Commit d0bcb72

Browse files
committed
conn_tuner: use remote gateway associated with route to aggregate cong alg info
this allows us to aggregate info on a default route versus locally-destined traffic and reduces overhead from a per-remote host to per route gateway; we can also get the gateway via the dest cache from the socket. Sample retransmit sock op events to avoid overheads, and also no need to send events for cong alg choice any more (we have summary on exit or explicit request). Also add support for retrieving/setting string sysctl values; we will need that for cong alg settings (allowed, default) Signed-off-by: Alan Maguire <[email protected]>
1 parent 8c6a3ff commit d0bcb72

10 files changed

+199
-99
lines changed

docs/bpftune-tcp-conn.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ DESCRIPTION
1919
such cases, BBR is a good fit since it continuously estimates bottleneck
2020
bandwidth and attempts to fit the congestion algorithm to it.
2121

22-
When we have limited information about a remote host - i.e. we have
22+
When we have limited information about a destination - i.e. we have
2323
not had > REMOTE_HOST_MIN_INSTANCES connections involving it,
2424
the only auto-selection involved is to use BBR in cases where
2525
loss rates exceed 1/(2^DROP_SHIFT) (1.5%) of the packet sent rate -
@@ -37,7 +37,7 @@ DESCRIPTION
3737
BDP = BottleneckBandwidth * MinRoundTripTime
3838

3939
The algorithm works as follows; BPF maintains a map of metrics keyed
40-
by remote IP address. For each remote IP address, we track the
40+
by remote gateway. For each remote gateway, we track the
4141
minimum RTT observed across all TCP connections and the max bandwidth
4242
observed. The former tells us - as closely as we can determine -
4343
what the true RTT of the link is. The latter estimates the

include/bpftune/bpftune.bpf.h

+4
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,10 @@ unsigned long long bpftune_init_net;
277277
#define NTF_EXT_LEARNED 0x10
278278
#endif
279279

280+
#ifndef RTF_GATEWAY
281+
#define RTF_GATEWAY 0x02
282+
#endif
283+
280284
#define EINVAL 22
281285
#define ENOMEM 12
282286
#define EAGAIN 11

include/bpftune/bpftune.h

+11-2
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ struct bpftunable_scenario {
9292

9393
#define BPFTUNABLE_NAMESPACED 0x1 /* settable in non-global namespace? */
9494
#define BPFTUNABLE_OPTIONAL 0x2 /* do not fail it tunable not found (e.g. ipv6 */
95+
#define BPFTUNABLE_STRING 0x4 /* tunable is a string, not numeric */
9596

9697
struct bpftunable_desc {
9798
unsigned int id;
@@ -106,11 +107,19 @@ struct bpftunable_stats {
106107
unsigned long nonglobal_ns[BPFTUNE_MAX_SCENARIOS];
107108
};
108109

110+
#define BPFTUNE_MAX_STR 128
111+
109112
struct bpftunable {
110113
struct bpftunable_desc desc;
111114
enum bpftune_state state;
112-
long initial_values[BPFTUNE_MAX_VALUES];
113-
long current_values[BPFTUNE_MAX_VALUES];
115+
union {
116+
long initial_values[BPFTUNE_MAX_VALUES];
117+
char initial_str[BPFTUNE_MAX_STR];
118+
};
119+
union {
120+
long current_values[BPFTUNE_MAX_VALUES];
121+
char current_str[BPFTUNE_MAX_STR];
122+
};
114123
struct bpftunable_stats stats;
115124
};
116125

include/bpftune/libbpftune.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ int bpftuner_tunable_sysctl_write(struct bpftuner *tuner,
158158
unsigned int tunable,
159159
unsigned int scenario,
160160
unsigned long netns_cookie,
161-
__u8 num_values, long *values,
161+
__u8 num_values, void *values,
162162
const char *fmt, ...);
163163

164164
int bpftuner_tunable_update(struct bpftuner *tuner,
@@ -349,7 +349,9 @@ void bpftune_ring_buffer_fini(void *ring_buffer);
349349

350350
void bpftune_sysctl_name_to_path(const char *name, char *path, size_t path_sz);
351351
int bpftune_sysctl_read(int netns_fd, const char *name, long *values);
352+
int bpftune_sysctl_read_string(int netns_fd, const char *name, char *val);
352353
int bpftune_sysctl_write(int netns_fd, const char *name, __u8 num_values, long *values);
354+
int bpftune_sysctl_write_string(int netns_fd, const char *name, char *val);
353355
long long bpftune_ksym_addr(char type, const char *name);
354356
int bpftune_snmpstat_read(unsigned long netns_cookie, int family, const char *linename, const char *name, long *value);
355357
int bpftune_netstat_read(unsigned long netns_cookie, int family, const char *linename, const char *name, long *value);

src/libbpftune.c

+127-50
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,7 @@ static void bpftuner_rollback(struct bpftuner *tuner, bool log_only)
806806
char newvals[PATH_MAX] = { };
807807
bool changes = false;
808808
char s[PATH_MAX];
809+
void *val;
809810

810811
if (t->desc.type != BPFTUNABLE_SYSCTL)
811812
continue;
@@ -823,13 +824,20 @@ static void bpftuner_rollback(struct bpftuner *tuner, bool log_only)
823824
/* nothing to rollback? */
824825
if (!changes)
825826
continue;
826-
for (j = 0; j < t->desc.num_values; j++) {
827-
snprintf(s, sizeof(s), "%ld ",
828-
t->initial_values[j]);
829-
strcat(oldvals, s);
830-
snprintf(s, sizeof(s), "%ld ",
831-
t->current_values[j]);
832-
strcat(newvals, s);
827+
if (t->desc.flags & BPFTUNABLE_STRING) {
828+
strncpy(oldvals, t->initial_str, sizeof(oldvals));
829+
strncpy(newvals, t->current_str, sizeof(newvals));
830+
val = t->initial_str;
831+
} else {
832+
for (j = 0; j < t->desc.num_values; j++) {
833+
snprintf(s, sizeof(s), "%ld ",
834+
t->initial_values[j]);
835+
strcat(oldvals, s);
836+
snprintf(s, sizeof(s), "%ld ",
837+
t->current_values[j]);
838+
strcat(newvals, s);
839+
}
840+
val = t->initial_values;
833841
}
834842
if (log_only) {
835843
bpftune_log(BPFTUNE_LOG_LEVEL, "# To roll back changes to '%s', run the following as a privileged user in a terminal:\n",
@@ -840,7 +848,7 @@ static void bpftuner_rollback(struct bpftuner *tuner, bool log_only)
840848
bpftuner_tunable_sysctl_write(tuner, i, k,
841849
0,
842850
t->desc.num_values,
843-
t->initial_values,
851+
val,
844852
"Rolling back sysctl values for '%s' from (%s) to original values (%s)...\n",
845853
t->desc.name,
846854
newvals, oldvals);
@@ -1001,9 +1009,11 @@ void bpftune_sysctl_name_to_path(const char *name, char *path, size_t path_sz)
10011009
path[i] = '/';
10021010
}
10031011

1004-
int bpftune_sysctl_read(int netns_fd, const char *name, long *values)
1012+
static int __bpftune_sysctl_read(int netns_fd, const char *name, void *val,
1013+
bool isstr)
10051014
{
10061015
int i, orig_netns_fd = 0, num_values = 0;
1016+
long *values = val;
10071017
char path[PATH_MAX];
10081018
int err = 0;
10091019
FILE *fp;
@@ -1025,25 +1035,33 @@ int bpftune_sysctl_read(int netns_fd, const char *name, long *values)
10251035
path, netns_fd, strerror(-err));
10261036
goto out;
10271037
}
1028-
num_values = fscanf(fp, "%ld %ld %ld",
1029-
&values[0], &values[1], &values[2]);
1030-
if (num_values == 0)
1031-
err = -ENOENT;
1032-
else if (num_values < 0)
1033-
err = -errno;
1038+
if (isstr) {
1039+
num_values = fscanf(fp, "%[^\n]", (char *)val);
1040+
if (num_values != 1)
1041+
err = -ENOENT;
1042+
else
1043+
bpftune_log(LOG_DEBUG, "Read %s = %s\n", name, (char *)val);
1044+
} else {
1045+
num_values = fscanf(fp, "%ld %ld %ld",
1046+
&values[0], &values[1], &values[2]);
1047+
if (num_values == 0) {
1048+
err = -ENOENT;
1049+
} else if (num_values < 0) {
1050+
err = -errno;
1051+
} else {
1052+
for (i = 0; i < num_values; i++) {
1053+
bpftune_log(LOG_DEBUG, "Read %s[%d] = %ld\n",
1054+
name, i, values[i]);
1055+
}
1056+
}
1057+
}
10341058
fclose(fp);
10351059

10361060
if (err) {
10371061
bpftune_log(LOG_ERR, "could not read from %s: %s\n", path,
10381062
strerror(-err));
10391063
goto out;
10401064
}
1041-
1042-
for (i = 0; i < num_values; i++) {
1043-
bpftune_log(LOG_DEBUG, "Read %s[%d] = %ld\n",
1044-
name, i, values[i]);
1045-
}
1046-
10471065
out:
10481066
bpftune_netns_set(orig_netns_fd, NULL, true);
10491067
out_unset:
@@ -1053,11 +1071,24 @@ int bpftune_sysctl_read(int netns_fd, const char *name, long *values)
10531071
return err ? err : num_values;
10541072
}
10551073

1056-
int bpftune_sysctl_write(int netns_fd, const char *name, __u8 num_values, long *values)
1074+
int bpftune_sysctl_read(int netns_fd, const char *name, long *values)
1075+
{
1076+
return __bpftune_sysctl_read(netns_fd, name, values, false);
1077+
}
1078+
1079+
int bpftune_sysctl_read_string(int netns_fd, const char *name, char *val)
1080+
{
1081+
return __bpftune_sysctl_read(netns_fd, name, val, true);
1082+
}
1083+
1084+
static int __bpftune_sysctl_write(int netns_fd, const char *name, __u8 num_values,
1085+
void *val, bool isstr)
10571086
{
10581087
long old_values[BPFTUNE_MAX_VALUES] = {};
1088+
char old_value_str[PATH_MAX] = {};
10591089
int i, err = 0, orig_netns_fd = 0;
10601090
int old_num_values;
1091+
long *values = val;
10611092
char path[PATH_MAX];
10621093
FILE *fp;
10631094

@@ -1074,18 +1105,27 @@ int bpftune_sysctl_write(int netns_fd, const char *name, __u8 num_values, long *
10741105
goto out_unset;
10751106

10761107
/* If value is already set to val, do nothing. */
1077-
old_num_values = bpftune_sysctl_read(0, name, old_values);
1108+
if (isstr) {
1109+
old_num_values = bpftune_sysctl_read_string(0, name, old_value_str);
1110+
} else {
1111+
old_num_values = bpftune_sysctl_read(0, name, old_values);
1112+
}
10781113
if (old_num_values < 0) {
10791114
err = old_num_values;
10801115
goto out;
10811116
}
10821117
if (num_values == old_num_values) {
1083-
for (i = 0; i < num_values; i++) {
1084-
if (old_values[i] != values[i])
1085-
break;
1118+
if (isstr) {
1119+
if (strcmp(old_value_str, val) == 0)
1120+
goto out;
1121+
} else {
1122+
for (i = 0; i < num_values; i++) {
1123+
if (old_values[i] != values[i])
1124+
break;
1125+
}
1126+
if (i == num_values)
1127+
goto out;
10861128
}
1087-
if (i == num_values)
1088-
goto out;
10891129
}
10901130
fp = fopen(path, "w");
10911131
if (!fp) {
@@ -1095,14 +1135,18 @@ int bpftune_sysctl_write(int netns_fd, const char *name, __u8 num_values, long *
10951135
goto out;
10961136
}
10971137

1098-
for (i = 0; i < num_values; i++)
1099-
fprintf(fp, "%ld ", values[i]);
1138+
if (isstr) {
1139+
fprintf(fp, "%s", (char *)val);
1140+
bpftune_log(LOG_DEBUG, "Wrote %s = %s\n", name, val);
1141+
} else {
1142+
for (i = 0; i < num_values; i++) {
1143+
fprintf(fp, "%ld ", values[i]);
1144+
bpftune_log(LOG_DEBUG, "Wrote %s[%d] = %ld\n",
1145+
name, i, values[i]);
1146+
}
1147+
}
11001148
fclose(fp);
11011149

1102-
for (i = 0; i < num_values; i++) {
1103-
bpftune_log(LOG_DEBUG, "Wrote %s[%d] = %ld\n",
1104-
name, i, values[i]);
1105-
}
11061150
out:
11071151
bpftune_netns_set(orig_netns_fd, NULL, true);
11081152
out_unset:
@@ -1112,6 +1156,17 @@ int bpftune_sysctl_write(int netns_fd, const char *name, __u8 num_values, long *
11121156
return err;
11131157
}
11141158

1159+
int bpftune_sysctl_write(int netns_fd, const char *name, __u8 num_values, long *values)
1160+
{
1161+
return __bpftune_sysctl_write(netns_fd, name, num_values, values, false);
1162+
}
1163+
1164+
int bpftune_sysctl_write_string(int netns_fd, const char *name, char *val)
1165+
{
1166+
return __bpftune_sysctl_write(netns_fd, name, 1, val, true);
1167+
}
1168+
1169+
11151170
long long bpftune_ksym_addr(char type, const char *name)
11161171
{
11171172
long long ret = -ENOENT;
@@ -1302,8 +1357,12 @@ int bpftuner_tunables_init(struct bpftuner *tuner, unsigned int num_descs,
13021357

13031358
if (descs[i].type != BPFTUNABLE_SYSCTL)
13041359
continue;
1305-
num_values = bpftune_sysctl_read(0, descs[i].name,
1306-
tuner->tunables[i].current_values);
1360+
if (descs[i].flags & BPFTUNABLE_STRING)
1361+
num_values = bpftune_sysctl_read_string(0, descs[i].name,
1362+
tuner->tunables[i].current_str);
1363+
else
1364+
num_values = bpftune_sysctl_read(0, descs[i].name,
1365+
tuner->tunables[i].current_values);
13071366
if (num_values < 0) {
13081367
if (descs[i].flags & BPFTUNABLE_OPTIONAL) {
13091368
bpftune_log(LOG_DEBUG, "error reading optional tunable '%s': %s\n",
@@ -1319,9 +1378,15 @@ int bpftuner_tunables_init(struct bpftuner *tuner, unsigned int num_descs,
13191378
descs[i].num_values, num_values);
13201379
return -EINVAL;
13211380
}
1322-
memcpy(tuner->tunables[i].initial_values,
1323-
tuner->tunables[i].current_values,
1324-
sizeof(tuner->tunables[i].initial_values));
1381+
if (descs[i].flags & BPFTUNABLE_STRING) {
1382+
strncpy(tuner->tunables[i].initial_str,
1383+
tuner->tunables[i].current_str,
1384+
sizeof(tuner->tunables[i].initial_str));
1385+
} else {
1386+
memcpy(tuner->tunables[i].initial_values,
1387+
tuner->tunables[i].current_values,
1388+
sizeof(tuner->tunables[i].initial_values));
1389+
}
13251390
}
13261391

13271392
return 0;
@@ -1387,13 +1452,18 @@ static void __bpftuner_scenario_log(struct bpftuner *tuner, unsigned int tunable
13871452
char s[PATH_MAX];
13881453
__u8 i;
13891454

1390-
for (i = 0; i < t->desc.num_values; i++) {
1391-
snprintf(s, sizeof(s), "%ld ",
1392-
t->initial_values[i]);
1393-
strcat(oldvals, s);
1394-
snprintf(s, sizeof(s), "%ld ",
1395-
t->current_values[i]);
1396-
strcat(newvals, s);
1455+
if (t->desc.flags & BPFTUNABLE_STRING) {
1456+
strncpy(oldvals, t->initial_str, sizeof(oldvals));
1457+
strncpy(newvals, t->current_str, sizeof(newvals));
1458+
} else {
1459+
for (i = 0; i < t->desc.num_values; i++) {
1460+
snprintf(s, sizeof(s), "%ld ",
1461+
t->initial_values[i]);
1462+
strcat(oldvals, s);
1463+
snprintf(s, sizeof(s), "%ld ",
1464+
t->current_values[i]);
1465+
strcat(newvals, s);
1466+
}
13971467
}
13981468
bpftune_log(BPFTUNE_LOG_LEVEL, "# sysctl '%s' changed from (%s) -> (%s)\n",
13991469
t->desc.name, oldvals, newvals);
@@ -1415,7 +1485,7 @@ static void __bpftuner_scenario_log(struct bpftuner *tuner, unsigned int tunable
14151485

14161486
int bpftuner_tunable_sysctl_write(struct bpftuner *tuner, unsigned int tunable,
14171487
unsigned int scenario, unsigned long netns_cookie,
1418-
__u8 num_values, long *values,
1488+
__u8 num_values, void *values,
14191489
const char *fmt, ...)
14201490
{
14211491
struct bpftunable *t = bpftuner_tunable(tuner, tunable);
@@ -1450,16 +1520,23 @@ int bpftuner_tunable_sysctl_write(struct bpftuner *tuner, unsigned int tunable,
14501520
}
14511521
global_ns = fd == 0;
14521522

1453-
ret = bpftune_sysctl_write(fd, t->desc.name, num_values, values);
1523+
if (t->desc.flags & BPFTUNABLE_STRING)
1524+
ret = bpftune_sysctl_write_string(fd, t->desc.name, values);
1525+
else
1526+
ret = bpftune_sysctl_write(fd, t->desc.name, num_values, values);
14541527
if (!ret) {
14551528
__u8 i;
14561529

14571530
bpftuner_scenario_log_fmt(tuner, tunable, scenario, fd, false, fmt);
14581531

14591532
/* only cache values for rollback for global ns */
14601533
if (global_ns) {
1461-
for (i = 0; i < t->desc.num_values; i++)
1462-
t->current_values[i] = values[i];
1534+
if (t->desc.flags & BPFTUNABLE_STRING) {
1535+
strncpy(t->current_str, values, sizeof(t->current_str));
1536+
} else {
1537+
for (i = 0; i < t->desc.num_values; i++)
1538+
t->current_values[i] = ((long *)values)[i];
1539+
}
14631540
}
14641541
} else if (ret < 0) {
14651542
/* If sysctl update failed, mark non-global netns as gone to

src/libbpftune.map

+2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ LIBBPFTUNE_0.1.1 {
5555
bpftune_ring_buffer_fini;
5656
bpftune_sysctl_name_to_path;
5757
bpftune_sysctl_read;
58+
bpftune_sysctl_read_string;
5859
bpftune_sysctl_write;
60+
bpftune_sysctl_write_string;
5961
bpftune_ksym_addr;
6062
bpftune_netstat_read;
6163
bpftune_snmpstat_read;

0 commit comments

Comments
 (0)