Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use fixed-point u64 math to divide the PLL values #212

Merged
merged 3 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- Added a "low pin count" variant of the f730 chip to the crate features: packages <144 pins don't include a high speed USB PHY
- Added SPI2_SCK pin for stm32f769i-discovery
- Fix mass-erase triggering in `flash` on smaller chips
- Remove the need for software u64 division in the clock setup code, shrinking code (#211)

## [v0.7.0] - 2022-06-05

Expand Down
144 changes: 112 additions & 32 deletions src/rcc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,43 @@ impl CFGR {
self
}

// We want to avoid dividing u64 values, because the Cortex-M7 CPU doesn't
// have hardware instructions for that, and the software divide that LLVM
// gives us is a relatively large amount of code.
//
// To do this, we operate in a fixed-point domain, and do a multiply by 1/x
// instead of dividing by x. We can calculate those 1/x values in a u32, if
// the fixed-point decimal place is chosen to be close enough to the LSB.
//
// But we also need to be able to represent the largest numerator, so we
// need enough bits to the left of the virtual decimal point.
//
// All of the chunks of code that do this are structured like:
//
// base_clk * n / m / p
//
// and they all have the same base_clk and n ranges (n up to 432, base_clk
// up to 50MHz). So base*plln can be as high as 216_000_000_000, and to
// represent that we need 38 bits.
//
// (We could use just 37 bits in one of these cases, if we take into account
// that high values of base_clk preclude using high values of n. But the
// other case is checking the output, so we can't assume anything about the
// inputs there.)
//
// So use 26 bits on the right of the decimal place.
//
// Also note, we need to round the 1/x values, not truncate them. So we
// shift left by one more bit, add one, and shift right by one.
const FIXED_POINT_LSHIFT: u32 = 31;
const FIXED_POINT_RSHIFT: u32 = 30;

// We also drop 4 bits from the base_clk so that it and the fractional part
// (above) can fit into 64 bits. The max base_clk*n value needs 38 bits;
// shifting this out means it can fit into 34, with 30 (above) for the
// fractions.
const BASE_CLK_SHIFT: u32 = 4;

/// Output clock calculation
fn calculate_clocks(&self) -> (Clocks, InternalRCCConfig) {
let mut config = InternalRCCConfig::default();
Expand All @@ -568,45 +605,72 @@ impl CFGR {
None => HSI_FREQUENCY,
}
.raw(),
);
) >> Self::BASE_CLK_SHIFT;

let mut sysclk = base_clk;
let mut sysclk = base_clk << Self::BASE_CLK_SHIFT;

let mut pll48clk_valid = false;

if self.use_pll {
sysclk = base_clk as u64 * self.plln as u64
/ self.pllm as u64
// These initial divisions have to operate on u32 values to avoid
// the software division. Fortunately our 26 bit choice for the
// decimal place, and the fact that these are 1/N, means we can
// fit them into 26 bits, so a u32 is fine.
let one_over_m = ((1 << Self::FIXED_POINT_LSHIFT) / (self.pllm as u32) + 1) >> 1;
let one_over_p = ((1 << Self::FIXED_POINT_LSHIFT)
/ match self.pllp {
PLLP::Div2 => 2,
PLLP::Div4 => 4,
PLLP::Div6 => 6,
PLLP::Div8 => 8,
};
PLLP::Div2 => 2u32,
PLLP::Div4 => 4u32,
PLLP::Div6 => 6u32,
PLLP::Div8 => 8u32,
}
+ 1)
>> 1;
sysclk = (((base_clk as u64 * self.plln as u64 * one_over_m as u64)
>> Self::FIXED_POINT_RSHIFT)
* one_over_p as u64)
>> Self::FIXED_POINT_RSHIFT
<< Self::BASE_CLK_SHIFT;
}

// Check if pll48clk is valid
if let Some(pll48clk) = self.pll48clk {
match pll48clk {
PLL48CLK::Pllq => {
pll48clk_valid = {
let pll48clk = base_clk as u64 * self.plln as u64
/ self.pllm as u64
/ self.pllq as u64;
let one_over_m =
((1 << Self::FIXED_POINT_LSHIFT) / (self.pllm as u32) + 1) >> 1;
let one_over_q =
((1 << Self::FIXED_POINT_LSHIFT) / (self.pllq as u32) + 1) >> 1;
let pll48clk = (((base_clk as u64 * self.plln as u64 * one_over_m as u64)
>> Self::FIXED_POINT_RSHIFT)
* one_over_q as u64)
>> Self::FIXED_POINT_RSHIFT
<< Self::BASE_CLK_SHIFT;
(48_000_000 - 120_000..=48_000_000 + 120_000).contains(&pll48clk)
}
}
PLL48CLK::Pllsai => {
pll48clk_valid = {
if self.use_pllsai {
let pll48clk = base_clk as u64 * self.pllsain as u64
/ self.pllm as u64
// base_clk * pllsain has the same range as above
let one_over_m =
((1 << Self::FIXED_POINT_LSHIFT) / (self.pllm as u32) + 1) >> 1;
let one_over_p = ((1 << Self::FIXED_POINT_LSHIFT)
/ match self.pllsaip {
PLLSAIP::Div2 => 2,
PLLSAIP::Div4 => 4,
PLLSAIP::Div6 => 6,
PLLSAIP::Div8 => 8,
};
PLLSAIP::Div2 => 2u32,
PLLSAIP::Div4 => 4u32,
PLLSAIP::Div6 => 6u32,
PLLSAIP::Div8 => 8u32,
}
+ 1)
>> 1;
let pll48clk =
(((base_clk as u64 * self.pllsain as u64 * one_over_m as u64)
>> Self::FIXED_POINT_RSHIFT)
* one_over_p as u64)
>> Self::FIXED_POINT_RSHIFT
<< Self::BASE_CLK_SHIFT;
(48_000_000 - 120_000..=48_000_000 + 120_000).contains(&pll48clk)
} else {
false
Expand Down Expand Up @@ -801,7 +865,13 @@ impl CFGR {
n = 432;
continue;
}
let f_vco_clock = (f_pll_clock_input as u64 * n as u64 / m as u64) as u32;
// See the comments around Self::FIXED_POINT_LSHIFT to see how this works.
let one_over_m = ((1 << Self::FIXED_POINT_LSHIFT) / (m as u32) + 1) >> 1;
let f_vco_clock = (((f_pll_clock_input as u64 >> Self::BASE_CLK_SHIFT)
* n as u64
* one_over_m as u64)
>> Self::FIXED_POINT_RSHIFT
<< Self::BASE_CLK_SHIFT) as u32;
if f_vco_clock < 50_000_000 {
m += 1;
n = 432;
Expand Down Expand Up @@ -857,15 +927,16 @@ impl CFGR {
Some(hse) => hse.freq,
None => HSI_FREQUENCY,
}
.raw();
.raw()
>> Self::BASE_CLK_SHIFT;

let sysclk = if let Some(clk) = self.sysclk {
clk
} else {
base_clk
base_clk << Self::BASE_CLK_SHIFT
};

let p = if base_clk == sysclk {
let p = if base_clk << Self::BASE_CLK_SHIFT == sysclk {
None
} else {
Some((sysclk - 1, sysclk + 1))
Expand All @@ -885,20 +956,29 @@ impl CFGR {

// We check if (pllm, plln, pllp) allow to obtain the requested Sysclk,
// so that we don't have to calculate them
let one_over_m = ((1 << Self::FIXED_POINT_LSHIFT) / (self.pllm as u32) + 1) >> 1;
let one_over_p = ((1 << Self::FIXED_POINT_LSHIFT)
/ match self.pllp {
PLLP::Div2 => 2u32,
PLLP::Div4 => 4u32,
PLLP::Div6 => 6u32,
PLLP::Div8 => 8u32,
}
+ 1)
>> 1;
let p_ok = (sysclk as u64)
== (base_clk as u64 * self.plln as u64
/ self.pllm as u64
/ match self.pllp {
PLLP::Div2 => 2,
PLLP::Div4 => 4,
PLLP::Div6 => 6,
PLLP::Div8 => 8,
});
== (((base_clk as u64 * self.plln as u64 * one_over_m as u64)
>> Self::FIXED_POINT_RSHIFT)
* one_over_p as u64)
>> Self::FIXED_POINT_RSHIFT
<< Self::BASE_CLK_SHIFT;
if p_ok && q.is_none() {
return;
}

if let Some((m, n, p, q)) = CFGR::calculate_mnpq(base_clk, FreqRequest { p, q }) {
if let Some((m, n, p, q)) =
CFGR::calculate_mnpq(base_clk << Self::BASE_CLK_SHIFT, FreqRequest { p, q })
{
self.pllm = m as u8;
self.plln = n as u16;
if let Some(p) = p {
Expand Down
Loading