Skip to content

Commit 67c08f7

Browse files
author
Valentin Obst
committed
rust/net: add implementation of HyStart
Signed-off-by: Valentin Obst <[email protected]>
1 parent 29c6b40 commit 67c08f7

File tree

3 files changed

+268
-0
lines changed

3 files changed

+268
-0
lines changed

rust/kernel/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
1414
#![no_std]
1515
#![feature(allocator_api)]
16+
#![feature(associated_type_bounds)]
1617
#![feature(coerce_unsized)]
1718
#![feature(dispatch_from_dyn)]
1819
#![feature(new_uninit)]

rust/kernel/net/tcp/cong.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ use macros::{pin_data, pinned_drop};
2121

2222
use super::{InetConnectionSock, TcpSock};
2323

24+
pub mod hystart;
25+
2426
/// Congestion control algorithm (CCA).
2527
///
2628
/// A CCA is implemented as a set of callbacks that are invoked whenever

rust/kernel/net/tcp/cong/hystart.rs

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
3+
//! HyStart slow start algorithm.
4+
//!
5+
//! Based on:
6+
//! Sangtae Ha, Injong Rhee,
7+
//! Taming the elephants: New TCP slow start,
8+
//! Computer Networks, Volume 55, Issue 9, 2011, Pages 2092-2110,
9+
//! ISSN 1389-1286, <https://doi.org/10.1016/j.comnet.2011.01.014>
10+
11+
use crate::net::sock;
12+
use crate::net::tcp::{self, cong};
13+
use crate::time;
14+
use crate::{pr_err, pr_info};
15+
use core::cmp::min;
16+
17+
/// The heuristic that is used to find the exit point for slow start.
18+
pub enum HystartDetect {
19+
/// Exits slow start when the length of so-called ACK-trains becomes equal
20+
/// to the estimated minimum forward path one-way delay.
21+
AckTrain = 1,
22+
/// Exits slow start when the estimated RTT increase between two consecutive
23+
/// rounds exceeds a threshold that is based on the last RTT.
24+
Delay = 2,
25+
/// Combine both algorithms.
26+
Both = 3,
27+
}
28+
29+
/// Internal state of the [`HyStart`] algorithm.
30+
pub struct HyStartState {
31+
/// Number of ACKs already sampled to determine the RTT of this round.
32+
sample_cnt: u8,
33+
/// Whether the slow start exit point was found.
34+
found: bool,
35+
/// Time when the current round has started.
36+
round_start: time::Usecs32,
37+
/// Sequence number of the byte that marks the end of the current round.
38+
end_seq: u32,
39+
/// Time when the last ACK was received in this round.
40+
last_ack: time::Usecs32,
41+
/// The minimum RTT of the current round.
42+
curr_rtt: time::Usecs32,
43+
/// Estimate of the minimum forward path one-way delay of the link.
44+
pub delay_min: Option<time::Usecs32>,
45+
/// Time when the connection was created.
46+
// TODO: remove
47+
pub start_time: time::Usecs32,
48+
}
49+
50+
impl Default for HyStartState {
51+
fn default() -> Self {
52+
Self {
53+
sample_cnt: 0,
54+
found: false,
55+
round_start: 0,
56+
end_seq: 0,
57+
last_ack: 0,
58+
curr_rtt: 0,
59+
delay_min: None,
60+
// TODO: remove
61+
start_time: time::ktime_get_boot_fast_us32(),
62+
}
63+
}
64+
}
65+
66+
impl HyStartState {
67+
/// Returns true iff the algorithm `T` is in hybrid slow start.
68+
#[inline]
69+
pub fn in_hystart<T: HyStart>(&self, cwnd: u32) -> bool {
70+
!self.found && cwnd >= T::LOW_WINDOW
71+
}
72+
}
73+
74+
/// Implement this trait on [`Algorithm::Data`] to use [`HyStart`] for your CCA.
75+
///
76+
/// [`Algorithm::Data`]: cong::Algorithm::Data
77+
pub trait HasHyStartState {
78+
/// Returns the private data of the HyStart algorithm.
79+
fn hy(&self) -> &HyStartState;
80+
81+
/// Returns the private data of the HyStart algorithm.
82+
fn hy_mut(&mut self) -> &mut HyStartState;
83+
}
84+
85+
/// Implement this trait on your [`Algorithm`] to use HyStart. You still need to
86+
/// invoke the [`reset`] and [`update`] methods at the right places.
87+
///
88+
/// [`Algorithm`]: cong::Algorithm
89+
/// [`reset`]: HyStart::reset
90+
/// [`update`]: HyStart::update
91+
pub trait HyStart: cong::Algorithm<Data: HasHyStartState> {
92+
// TODO: Those constants should be configurable via module parameters.
93+
/// Which heuristic to use for deciding when it is time to exit slow start.
94+
const DETECT: HystartDetect;
95+
96+
/// Lower bound for cwnd during hybrid slow start.
97+
const LOW_WINDOW: u32;
98+
99+
/// Max spacing between ACKs in an ACK-train.
100+
const ACK_DELTA: time::Usecs32;
101+
102+
/// Number of ACKs to sample at the beginning of each round to estimate the
103+
/// RTT of this round.
104+
const MIN_SAMPLES: u8 = 8;
105+
106+
/// Lower bound on the increase in RTT between to consecutive rounds that is
107+
/// needed to trigger an exit from slow start.
108+
const DELAY_MIN: time::Usecs32 = 4000;
109+
110+
/// Upper bound on the increase in RTT between to consecutive rounds that is
111+
/// needed to trigger an exit from slow start.
112+
const DELAY_MAX: time::Usecs32 = 16000;
113+
114+
/// Corresponds to the function eta from the paper. Returns the increase in
115+
/// RTT between consecutive rounds that triggers and exit from slow start.
116+
/// `t` is the RTT of the last round.
117+
fn delay_thresh(mut t: time::Usecs32) -> time::Usecs32 {
118+
t >>= 3;
119+
120+
if t < Self::DELAY_MIN {
121+
Self::DELAY_MIN
122+
} else if t > Self::DELAY_MAX {
123+
Self::DELAY_MAX
124+
} else {
125+
t
126+
}
127+
}
128+
129+
/// TODO
130+
fn ack_delay(sk: &cong::Sock<'_, Self>) -> time::Usecs32 {
131+
(match sk.sk_pacing_rate() {
132+
0 => 0,
133+
rate => min(
134+
time::USEC_PER_MSEC,
135+
((sk.sk_gso_max_size() as u64) * 4 * time::USEC_PER_SEC) / rate,
136+
),
137+
} as time::Usecs32)
138+
}
139+
140+
/// Called in slow start at the beginning of a new round of incoming ACKs.
141+
fn reset(sk: &mut cong::Sock<'_, Self>) {
142+
let tp = sk.tcp_sk();
143+
let now = tp.tcp_mstamp() as time::Usecs32;
144+
let snd_nxt = tp.snd_nxt();
145+
146+
let hy = sk.inet_csk_ca_mut().hy_mut();
147+
148+
hy.round_start = now;
149+
hy.last_ack = now;
150+
hy.end_seq = snd_nxt;
151+
hy.curr_rtt = u32::MAX;
152+
hy.sample_cnt = 0;
153+
}
154+
155+
/// Called in slow start to decide if it is time to exit slow start. Sets
156+
/// [`HyStartState`] `found` to true when it is time to exit.
157+
fn update(sk: &mut cong::Sock<'_, Self>, delay: time::Usecs32) {
158+
// Start of a new round.
159+
if tcp::after(sk.tcp_sk().snd_una(), sk.inet_csk_ca().hy().end_seq) {
160+
Self::reset(sk);
161+
}
162+
let hy = sk.inet_csk_ca().hy();
163+
let Some(delay_min) = hy.delay_min else {
164+
// This should not happen.
165+
pr_err!("hystart: update: delay_min was None");
166+
return;
167+
};
168+
169+
if matches!(Self::DETECT, HystartDetect::Both | HystartDetect::AckTrain) {
170+
let tp = sk.tcp_sk();
171+
let now = tp.tcp_mstamp() as time::Usecs32;
172+
173+
// Is this ACK part of a train?
174+
// NOTE: I don't get it. C is doing this as a signed comparison but
175+
// for:
176+
// -- `0 <= now < ca->last_ack <= 0x7F..F` this means it always
177+
// passes,
178+
// -- `ca->last_ack = 0x80..0` and `0 <= new <= 0x7F..F` it also
179+
// always passes,
180+
// -- `0x80..00 < ca->last_ack` and `now < 0x80.0` (big enough)
181+
// also always passes.
182+
// If I understand the paper correctly, this is not what is
183+
// intended. What we really want here is the unsigned version I
184+
// guess, please correct me if I am wrong.
185+
// Commit: c54b4b7655447c1f24f6d50779c22eba9ee0fd24
186+
// Purposefully introduced the cast ... am I just stupid?
187+
// Link: https://godbolt.org/z/E7ocxae69
188+
if now.wrapping_sub(hy.last_ack) <= Self::ACK_DELTA {
189+
let threshold = if let Ok(sock::Pacing::r#None) = sk.sk_pacing_status() {
190+
(delay_min + Self::ack_delay(sk)) >> 1
191+
} else {
192+
delay_min + Self::ack_delay(sk)
193+
};
194+
195+
// Does the length of this ACK-train indicate it is time to
196+
// exit slow start?
197+
// NOTE: C is a bit weird here ... `threshold` is unsigned but
198+
// the lhs is still cast to signed, even though the usual
199+
// arithmetic conversions will immediately cast it back to
200+
// unsigned; thus, I guess we can just do everything unsigned.
201+
if now.wrapping_sub(hy.round_start) > threshold {
202+
// TODO: change to debug
203+
pr_info!(
204+
"hystart_ack_train ({}us > {}us) delay_min {}us (+ ack_delay {}us) cwnd {}, start {}us",
205+
now.wrapping_sub(hy.round_start),
206+
threshold,
207+
delay_min,
208+
Self::ack_delay(sk),
209+
tp.snd_cwnd(),
210+
hy.start_time
211+
);
212+
213+
let tp = sk.tcp_sk_mut();
214+
215+
tp.set_snd_ssthresh(tp.snd_cwnd());
216+
217+
sk.inet_csk_ca_mut().hy_mut().found = true;
218+
219+
// TODO: Update net stats.
220+
}
221+
222+
sk.inet_csk_ca_mut().hy_mut().last_ack = now;
223+
}
224+
}
225+
226+
if matches!(Self::DETECT, HystartDetect::Both | HystartDetect::Delay) {
227+
let hy = sk.inet_csk_ca_mut().hy_mut();
228+
229+
// The paper only takes the min RTT of the first `MIN_SAMPLES`
230+
// ACKs in a round, but it does no harm to consider later ACKs as
231+
// well.
232+
if hy.curr_rtt > delay {
233+
hy.curr_rtt = delay
234+
}
235+
236+
if hy.sample_cnt < Self::MIN_SAMPLES {
237+
hy.sample_cnt += 1;
238+
} else {
239+
// Does the increase in RTT indicate its time to exit slow
240+
// start?
241+
if hy.curr_rtt > delay_min + Self::delay_thresh(delay_min) {
242+
hy.found = true;
243+
244+
// TODO: change to debug
245+
let curr_rtt = hy.curr_rtt;
246+
let start_time = hy.start_time;
247+
pr_info!(
248+
"hystart_delay: {}us > {}us, delay_min {}us (+ delay_thresh {}us), cwnd {}, start {}us",
249+
curr_rtt,
250+
delay_min + Self::delay_thresh(delay_min),
251+
delay_min,
252+
Self::delay_thresh(delay_min),
253+
sk.tcp_sk().snd_cwnd(),
254+
start_time,
255+
);
256+
// TODO: Update net stats.
257+
258+
let tp = sk.tcp_sk_mut();
259+
260+
tp.set_snd_ssthresh(tp.snd_cwnd());
261+
}
262+
}
263+
}
264+
}
265+
}

0 commit comments

Comments
 (0)