stm32-rs · eldruin · Mar 27, 2024 · Feb 11, 2024 · Feb 11, 2024 · Feb 11, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 - Added a "low pin count" variant of the f730 chip to the crate features: packages <144 pins don't include a high speed USB PHY
 - Added SPI2_SCK pin for stm32f769i-discovery
 - Fix mass-erase triggering in `flash` on smaller chips
+- Remove the need for software u64 division in the clock setup code, shrinking code (#211)
 
 ## [v0.7.0] - 2022-06-05
 

diff --git a/src/rcc.rs b/src/rcc.rs
@@ -558,6 +558,43 @@ impl CFGR {
         self
     }
 
+    // We want to avoid dividing u64 values, because the Cortex-M7 CPU doesn't
+    // have hardware instructions for that, and the software divide that LLVM
+    // gives us is a relatively large amount of code.
+    //
+    // To do this, we operate in a fixed-point domain, and do a multiply by 1/x
+    // instead of dividing by x.  We can calculate those 1/x values in a u32, if
+    // the fixed-point decimal place is chosen to be close enough to the LSB.
+    //
+    // But we also need to be able to represent the largest numerator, so we
+    // need enough bits to the left of the virtual decimal point.
+    //
+    // All of the chunks of code that do this are structured like:
+    //
+    // base_clk * n / m / p
+    //
+    // and they all have the same base_clk and n ranges (n up to 432, base_clk
+    // up to 50MHz).  So base*plln can be as high as 216_000_000_000, and to
+    // represent that we need 38 bits.
+    //
+    // (We could use just 37 bits in one of these cases, if we take into account
+    // that high values of base_clk preclude using high values of n.  But the
+    // other case is checking the output, so we can't assume anything about the
+    // inputs there.)
+    //
+    // So use 26 bits on the right of the decimal place.
+    //
+    // Also note, we need to round the 1/x values, not truncate them.  So we
+    // shift left by one more bit, add one, and shift right by one.
+    const FIXED_POINT_LSHIFT: u32 = 31;
+    const FIXED_POINT_RSHIFT: u32 = 30;
+
+    // We also drop 4 bits from the base_clk so that it and the fractional part
+    // (above) can fit into 64 bits.  The max base_clk*n value needs 38 bits;
+    // shifting this out means it can fit into 34, with 30 (above) for the
+    // fractions.
+    const BASE_CLK_SHIFT: u32 = 4;
+
     /// Output clock calculation
     fn calculate_clocks(&self) -> (Clocks, InternalRCCConfig) {
         let mut config = InternalRCCConfig::default();
@@ -568,45 +605,72 @@ impl CFGR {
                 None => HSI_FREQUENCY,
             }
             .raw(),
-        );
+        ) >> Self::BASE_CLK_SHIFT;
 
-        let mut sysclk = base_clk;
+        let mut sysclk = base_clk << Self::BASE_CLK_SHIFT;
 
         let mut pll48clk_valid = false;
 
         if self.use_pll {
-            sysclk = base_clk as u64 * self.plln as u64
-                / self.pllm as u64
+            // These initial divisions have to operate on u32 values to avoid
+            // the software division.  Fortunately our 26 bit choice for the
+            // decimal place, and the fact that these are 1/N, means we can
+            // fit them into 26 bits, so a u32 is fine.
+            let one_over_m = ((1 << Self::FIXED_POINT_LSHIFT) / (self.pllm as u32) + 1) >> 1;
+            let one_over_p = ((1 << Self::FIXED_POINT_LSHIFT)
                 / match self.pllp {
-                    PLLP::Div2 => 2,
-                    PLLP::Div4 => 4,
-                    PLLP::Div6 => 6,
-                    PLLP::Div8 => 8,
-                };
+                    PLLP::Div2 => 2u32,
+                    PLLP::Div4 => 4u32,
+                    PLLP::Div6 => 6u32,
+                    PLLP::Div8 => 8u32,
+                }
+                + 1)
+                >> 1;
+            sysclk = (((base_clk as u64 * self.plln as u64 * one_over_m as u64)
+                >> Self::FIXED_POINT_RSHIFT)
+                * one_over_p as u64)
+                >> Self::FIXED_POINT_RSHIFT
+                << Self::BASE_CLK_SHIFT;
         }
 
         // Check if pll48clk is valid
         if let Some(pll48clk) = self.pll48clk {
             match pll48clk {
                 PLL48CLK::Pllq => {
                     pll48clk_valid = {
-                        let pll48clk = base_clk as u64 * self.plln as u64
-                            / self.pllm as u64
-                            / self.pllq as u64;
+                        let one_over_m =
+                            ((1 << Self::FIXED_POINT_LSHIFT) / (self.pllm as u32) + 1) >> 1;
+                        let one_over_q =
+                            ((1 << Self::FIXED_POINT_LSHIFT) / (self.pllq as u32) + 1) >> 1;
+                        let pll48clk = (((base_clk as u64 * self.plln as u64 * one_over_m as u64)
+                            >> Self::FIXED_POINT_RSHIFT)
+                            * one_over_q as u64)
+                            >> Self::FIXED_POINT_RSHIFT
+                            << Self::BASE_CLK_SHIFT;
                         (48_000_000 - 120_000..=48_000_000 + 120_000).contains(&pll48clk)
                     }
                 }
                 PLL48CLK::Pllsai => {
                     pll48clk_valid = {
                         if self.use_pllsai {
-                            let pll48clk = base_clk as u64 * self.pllsain as u64
-                                / self.pllm as u64
+                            // base_clk * pllsain has the same range as above
+                            let one_over_m =
+                                ((1 << Self::FIXED_POINT_LSHIFT) / (self.pllm as u32) + 1) >> 1;
+                            let one_over_p = ((1 << Self::FIXED_POINT_LSHIFT)
                                 / match self.pllsaip {
-                                    PLLSAIP::Div2 => 2,
-                                    PLLSAIP::Div4 => 4,
-                                    PLLSAIP::Div6 => 6,
-                                    PLLSAIP::Div8 => 8,
-                                };
+                                    PLLSAIP::Div2 => 2u32,
+                                    PLLSAIP::Div4 => 4u32,
+                                    PLLSAIP::Div6 => 6u32,
+                                    PLLSAIP::Div8 => 8u32,
+                                }
+                                + 1)
+                                >> 1;
+                            let pll48clk =
+                                (((base_clk as u64 * self.pllsain as u64 * one_over_m as u64)
+                                    >> Self::FIXED_POINT_RSHIFT)
+                                    * one_over_p as u64)
+                                    >> Self::FIXED_POINT_RSHIFT
+                                    << Self::BASE_CLK_SHIFT;
                             (48_000_000 - 120_000..=48_000_000 + 120_000).contains(&pll48clk)
                         } else {
                             false
@@ -801,7 +865,13 @@ impl CFGR {
                 n = 432;
                 continue;
             }
-            let f_vco_clock = (f_pll_clock_input as u64 * n as u64 / m as u64) as u32;
+            // See the comments around Self::FIXED_POINT_LSHIFT to see how this works.
+            let one_over_m = ((1 << Self::FIXED_POINT_LSHIFT) / (m as u32) + 1) >> 1;
+            let f_vco_clock = (((f_pll_clock_input as u64 >> Self::BASE_CLK_SHIFT)
+                * n as u64
+                * one_over_m as u64)
+                >> Self::FIXED_POINT_RSHIFT
+                << Self::BASE_CLK_SHIFT) as u32;
             if f_vco_clock < 50_000_000 {
                 m += 1;
                 n = 432;
@@ -857,15 +927,16 @@ impl CFGR {
             Some(hse) => hse.freq,
             None => HSI_FREQUENCY,
         }
-        .raw();
+        .raw()
+            >> Self::BASE_CLK_SHIFT;
 
         let sysclk = if let Some(clk) = self.sysclk {
             clk
         } else {
-            base_clk
+            base_clk << Self::BASE_CLK_SHIFT
         };
 
-        let p = if base_clk == sysclk {
+        let p = if base_clk << Self::BASE_CLK_SHIFT == sysclk {
             None
         } else {
             Some((sysclk - 1, sysclk + 1))
@@ -885,20 +956,29 @@ impl CFGR {
 
         // We check if (pllm, plln, pllp) allow to obtain the requested Sysclk,
         // so that we don't have to calculate them
+        let one_over_m = ((1 << Self::FIXED_POINT_LSHIFT) / (self.pllm as u32) + 1) >> 1;
+        let one_over_p = ((1 << Self::FIXED_POINT_LSHIFT)
+            / match self.pllp {
+                PLLP::Div2 => 2u32,
+                PLLP::Div4 => 4u32,
+                PLLP::Div6 => 6u32,
+                PLLP::Div8 => 8u32,
+            }
+            + 1)
+            >> 1;
         let p_ok = (sysclk as u64)
-            == (base_clk as u64 * self.plln as u64
-                / self.pllm as u64
-                / match self.pllp {
-                    PLLP::Div2 => 2,
-                    PLLP::Div4 => 4,
-                    PLLP::Div6 => 6,
-                    PLLP::Div8 => 8,
-                });
+            == (((base_clk as u64 * self.plln as u64 * one_over_m as u64)
+                >> Self::FIXED_POINT_RSHIFT)
+                * one_over_p as u64)
+                >> Self::FIXED_POINT_RSHIFT
+                << Self::BASE_CLK_SHIFT;
         if p_ok && q.is_none() {
             return;
         }
 
-        if let Some((m, n, p, q)) = CFGR::calculate_mnpq(base_clk, FreqRequest { p, q }) {
+        if let Some((m, n, p, q)) =
+            CFGR::calculate_mnpq(base_clk << Self::BASE_CLK_SHIFT, FreqRequest { p, q })
+        {
             self.pllm = m as u8;
             self.plln = n as u16;
             if let Some(p) = p {