|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
| 2 | + |
| 3 | +//! HyStart slow start algorithm. |
| 4 | +//! |
| 5 | +//! Based on: |
| 6 | +//! Sangtae Ha, Injong Rhee, |
| 7 | +//! Taming the elephants: New TCP slow start, |
| 8 | +//! Computer Networks, Volume 55, Issue 9, 2011, Pages 2092-2110, |
| 9 | +//! ISSN 1389-1286, <https://doi.org/10.1016/j.comnet.2011.01.014> |
| 10 | +
|
| 11 | +use crate::net::sock; |
| 12 | +use crate::net::tcp::{self, cong}; |
| 13 | +use crate::time; |
| 14 | +use crate::{pr_err, pr_info}; |
| 15 | +use core::cmp::min; |
| 16 | + |
| 17 | +/// The heuristic that is used to find the exit point for slow start. |
| 18 | +pub enum HystartDetect { |
| 19 | + /// Exits slow start when the length of so-called ACK-trains becomes equal |
| 20 | + /// to the estimated minimum forward path one-way delay. |
| 21 | + AckTrain = 1, |
| 22 | + /// Exits slow start when the estimated RTT increase between two consecutive |
| 23 | + /// rounds exceeds a threshold that is based on the last RTT. |
| 24 | + Delay = 2, |
| 25 | + /// Combine both algorithms. |
| 26 | + Both = 3, |
| 27 | +} |
| 28 | + |
| 29 | +/// Internal state of the [`HyStart`] algorithm. |
| 30 | +pub struct HyStartState { |
| 31 | + /// Number of ACKs already sampled to determine the RTT of this round. |
| 32 | + sample_cnt: u8, |
| 33 | + /// Whether the slow start exit point was found. |
| 34 | + found: bool, |
| 35 | + /// Time when the current round has started. |
| 36 | + round_start: time::Usecs32, |
| 37 | + /// Sequence number of the byte that marks the end of the current round. |
| 38 | + end_seq: u32, |
| 39 | + /// Time when the last ACK was received in this round. |
| 40 | + last_ack: time::Usecs32, |
| 41 | + /// The minimum RTT of the current round. |
| 42 | + curr_rtt: time::Usecs32, |
| 43 | + /// Estimate of the minimum forward path one-way delay of the link. |
| 44 | + pub delay_min: Option<time::Usecs32>, |
| 45 | + /// Time when the connection was created. |
| 46 | + // TODO: remove |
| 47 | + pub start_time: time::Usecs32, |
| 48 | +} |
| 49 | + |
| 50 | +impl Default for HyStartState { |
| 51 | + fn default() -> Self { |
| 52 | + Self { |
| 53 | + sample_cnt: 0, |
| 54 | + found: false, |
| 55 | + round_start: 0, |
| 56 | + end_seq: 0, |
| 57 | + last_ack: 0, |
| 58 | + curr_rtt: 0, |
| 59 | + delay_min: None, |
| 60 | + // TODO: remove |
| 61 | + start_time: time::ktime_get_boot_fast_us32(), |
| 62 | + } |
| 63 | + } |
| 64 | +} |
| 65 | + |
| 66 | +impl HyStartState { |
| 67 | + /// Returns true iff the algorithm `T` is in hybrid slow start. |
| 68 | + #[inline] |
| 69 | + pub fn in_hystart<T: HyStart>(&self, cwnd: u32) -> bool { |
| 70 | + !self.found && cwnd >= T::LOW_WINDOW |
| 71 | + } |
| 72 | +} |
| 73 | + |
| 74 | +/// Implement this trait on [`Algorithm::Data`] to use [`HyStart`] for your CCA. |
| 75 | +/// |
| 76 | +/// [`Algorithm::Data`]: cong::Algorithm::Data |
| 77 | +pub trait HasHyStartState { |
| 78 | + /// Returns the private data of the HyStart algorithm. |
| 79 | + fn hy(&self) -> &HyStartState; |
| 80 | + |
| 81 | + /// Returns the private data of the HyStart algorithm. |
| 82 | + fn hy_mut(&mut self) -> &mut HyStartState; |
| 83 | +} |
| 84 | + |
| 85 | +/// Implement this trait on your [`Algorithm`] to use HyStart. You still need to |
| 86 | +/// invoke the [`reset`] and [`update`] methods at the right places. |
| 87 | +/// |
| 88 | +/// [`Algorithm`]: cong::Algorithm |
| 89 | +/// [`reset`]: HyStart::reset |
| 90 | +/// [`update`]: HyStart::update |
| 91 | +pub trait HyStart: cong::Algorithm<Data: HasHyStartState> { |
| 92 | + // TODO: Those constants should be configurable via module parameters. |
| 93 | + /// Which heuristic to use for deciding when it is time to exit slow start. |
| 94 | + const DETECT: HystartDetect; |
| 95 | + |
| 96 | + /// Lower bound for cwnd during hybrid slow start. |
| 97 | + const LOW_WINDOW: u32; |
| 98 | + |
| 99 | + /// Max spacing between ACKs in an ACK-train. |
| 100 | + const ACK_DELTA: time::Usecs32; |
| 101 | + |
| 102 | + /// Number of ACKs to sample at the beginning of each round to estimate the |
| 103 | + /// RTT of this round. |
| 104 | + const MIN_SAMPLES: u8 = 8; |
| 105 | + |
| 106 | + /// Lower bound on the increase in RTT between to consecutive rounds that is |
| 107 | + /// needed to trigger an exit from slow start. |
| 108 | + const DELAY_MIN: time::Usecs32 = 4000; |
| 109 | + |
| 110 | + /// Upper bound on the increase in RTT between to consecutive rounds that is |
| 111 | + /// needed to trigger an exit from slow start. |
| 112 | + const DELAY_MAX: time::Usecs32 = 16000; |
| 113 | + |
| 114 | + /// Corresponds to the function eta from the paper. Returns the increase in |
| 115 | + /// RTT between consecutive rounds that triggers and exit from slow start. |
| 116 | + /// `t` is the RTT of the last round. |
| 117 | + fn delay_thresh(mut t: time::Usecs32) -> time::Usecs32 { |
| 118 | + t >>= 3; |
| 119 | + |
| 120 | + if t < Self::DELAY_MIN { |
| 121 | + Self::DELAY_MIN |
| 122 | + } else if t > Self::DELAY_MAX { |
| 123 | + Self::DELAY_MAX |
| 124 | + } else { |
| 125 | + t |
| 126 | + } |
| 127 | + } |
| 128 | + |
| 129 | + /// TODO |
| 130 | + fn ack_delay(sk: &cong::Sock<'_, Self>) -> time::Usecs32 { |
| 131 | + (match sk.sk_pacing_rate() { |
| 132 | + 0 => 0, |
| 133 | + rate => min( |
| 134 | + time::USEC_PER_MSEC, |
| 135 | + ((sk.sk_gso_max_size() as u64) * 4 * time::USEC_PER_SEC) / rate, |
| 136 | + ), |
| 137 | + } as time::Usecs32) |
| 138 | + } |
| 139 | + |
| 140 | + /// Called in slow start at the beginning of a new round of incoming ACKs. |
| 141 | + fn reset(sk: &mut cong::Sock<'_, Self>) { |
| 142 | + let tp = sk.tcp_sk(); |
| 143 | + let now = tp.tcp_mstamp() as time::Usecs32; |
| 144 | + let snd_nxt = tp.snd_nxt(); |
| 145 | + |
| 146 | + let hy = sk.inet_csk_ca_mut().hy_mut(); |
| 147 | + |
| 148 | + hy.round_start = now; |
| 149 | + hy.last_ack = now; |
| 150 | + hy.end_seq = snd_nxt; |
| 151 | + hy.curr_rtt = u32::MAX; |
| 152 | + hy.sample_cnt = 0; |
| 153 | + } |
| 154 | + |
| 155 | + /// Called in slow start to decide if it is time to exit slow start. Sets |
| 156 | + /// [`HyStartState`] `found` to true when it is time to exit. |
| 157 | + fn update(sk: &mut cong::Sock<'_, Self>, delay: time::Usecs32) { |
| 158 | + // Start of a new round. |
| 159 | + if tcp::after(sk.tcp_sk().snd_una(), sk.inet_csk_ca().hy().end_seq) { |
| 160 | + Self::reset(sk); |
| 161 | + } |
| 162 | + let hy = sk.inet_csk_ca().hy(); |
| 163 | + let Some(delay_min) = hy.delay_min else { |
| 164 | + // This should not happen. |
| 165 | + pr_err!("hystart: update: delay_min was None"); |
| 166 | + return; |
| 167 | + }; |
| 168 | + |
| 169 | + if matches!(Self::DETECT, HystartDetect::Both | HystartDetect::AckTrain) { |
| 170 | + let tp = sk.tcp_sk(); |
| 171 | + let now = tp.tcp_mstamp() as time::Usecs32; |
| 172 | + |
| 173 | + // Is this ACK part of a train? |
| 174 | + // NOTE: I don't get it. C is doing this as a signed comparison but |
| 175 | + // for: |
| 176 | + // -- `0 <= now < ca->last_ack <= 0x7F..F` this means it always |
| 177 | + // passes, |
| 178 | + // -- `ca->last_ack = 0x80..0` and `0 <= new <= 0x7F..F` it also |
| 179 | + // always passes, |
| 180 | + // -- `0x80..00 < ca->last_ack` and `now < 0x80.0` (big enough) |
| 181 | + // also always passes. |
| 182 | + // If I understand the paper correctly, this is not what is |
| 183 | + // intended. What we really want here is the unsigned version I |
| 184 | + // guess, please correct me if I am wrong. |
| 185 | + // Commit: c54b4b7655447c1f24f6d50779c22eba9ee0fd24 |
| 186 | + // Purposefully introduced the cast ... am I just stupid? |
| 187 | + // Link: https://godbolt.org/z/E7ocxae69 |
| 188 | + if now.wrapping_sub(hy.last_ack) <= Self::ACK_DELTA { |
| 189 | + let threshold = if let Ok(sock::Pacing::r#None) = sk.sk_pacing_status() { |
| 190 | + (delay_min + Self::ack_delay(sk)) >> 1 |
| 191 | + } else { |
| 192 | + delay_min + Self::ack_delay(sk) |
| 193 | + }; |
| 194 | + |
| 195 | + // Does the length of this ACK-train indicate it is time to |
| 196 | + // exit slow start? |
| 197 | + // NOTE: C is a bit weird here ... `threshold` is unsigned but |
| 198 | + // the lhs is still cast to signed, even though the usual |
| 199 | + // arithmetic conversions will immediately cast it back to |
| 200 | + // unsigned; thus, I guess we can just do everything unsigned. |
| 201 | + if now.wrapping_sub(hy.round_start) > threshold { |
| 202 | + // TODO: change to debug |
| 203 | + pr_info!( |
| 204 | + "hystart_ack_train ({}us > {}us) delay_min {}us (+ ack_delay {}us) cwnd {}, start {}us", |
| 205 | + now.wrapping_sub(hy.round_start), |
| 206 | + threshold, |
| 207 | + delay_min, |
| 208 | + Self::ack_delay(sk), |
| 209 | + tp.snd_cwnd(), |
| 210 | + hy.start_time |
| 211 | + ); |
| 212 | + |
| 213 | + let tp = sk.tcp_sk_mut(); |
| 214 | + |
| 215 | + tp.set_snd_ssthresh(tp.snd_cwnd()); |
| 216 | + |
| 217 | + sk.inet_csk_ca_mut().hy_mut().found = true; |
| 218 | + |
| 219 | + // TODO: Update net stats. |
| 220 | + } |
| 221 | + |
| 222 | + sk.inet_csk_ca_mut().hy_mut().last_ack = now; |
| 223 | + } |
| 224 | + } |
| 225 | + |
| 226 | + if matches!(Self::DETECT, HystartDetect::Both | HystartDetect::Delay) { |
| 227 | + let hy = sk.inet_csk_ca_mut().hy_mut(); |
| 228 | + |
| 229 | + // The paper only takes the min RTT of the first `MIN_SAMPLES` |
| 230 | + // ACKs in a round, but it does no harm to consider later ACKs as |
| 231 | + // well. |
| 232 | + if hy.curr_rtt > delay { |
| 233 | + hy.curr_rtt = delay |
| 234 | + } |
| 235 | + |
| 236 | + if hy.sample_cnt < Self::MIN_SAMPLES { |
| 237 | + hy.sample_cnt += 1; |
| 238 | + } else { |
| 239 | + // Does the increase in RTT indicate its time to exit slow |
| 240 | + // start? |
| 241 | + if hy.curr_rtt > delay_min + Self::delay_thresh(delay_min) { |
| 242 | + hy.found = true; |
| 243 | + |
| 244 | + // TODO: change to debug |
| 245 | + let curr_rtt = hy.curr_rtt; |
| 246 | + let start_time = hy.start_time; |
| 247 | + pr_info!( |
| 248 | + "hystart_delay: {}us > {}us, delay_min {}us (+ delay_thresh {}us), cwnd {}, start {}us", |
| 249 | + curr_rtt, |
| 250 | + delay_min + Self::delay_thresh(delay_min), |
| 251 | + delay_min, |
| 252 | + Self::delay_thresh(delay_min), |
| 253 | + sk.tcp_sk().snd_cwnd(), |
| 254 | + start_time, |
| 255 | + ); |
| 256 | + // TODO: Update net stats. |
| 257 | + |
| 258 | + let tp = sk.tcp_sk_mut(); |
| 259 | + |
| 260 | + tp.set_snd_ssthresh(tp.snd_cwnd()); |
| 261 | + } |
| 262 | + } |
| 263 | + } |
| 264 | + } |
| 265 | +} |
0 commit comments