From 13d267f61e49d065cf3e6db714d5696390a36556 Mon Sep 17 00:00:00 2001 From: sawyer bristol Date: Thu, 26 Jun 2025 20:34:18 -0600 Subject: [PATCH] can alloc with psram --- Cargo.lock | 58 +++++ Cargo.toml | 8 +- src/heap.rs | 133 +++++++++++ src/main.rs | 27 ++- src/psram.rs | 615 +++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 829 insertions(+), 12 deletions(-) create mode 100644 src/heap.rs create mode 100644 src/psram.rs diff --git a/Cargo.lock b/Cargo.lock index fe98260..0473f8c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,6 +75,12 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "bisync" version = "0.3.0" @@ -196,6 +202,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "const-default" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b396d1f76d455557e1218ec8066ae14bba60b4b36ecd55577ba979f5db7ecaa" + [[package]] name = "cortex-m" version = "0.7.7" @@ -673,6 +685,18 @@ dependencies = [ "defmt 0.3.100", ] +[[package]] +name = "embedded-alloc" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f2de9133f68db0d4627ad69db767726c99ff8585272716708227008d3f1bddd" +dependencies = [ + "const-default", + "critical-section", + "linked_list_allocator", + "rlsf", +] + [[package]] name = "embedded-graphics" version = "0.8.1" @@ -1147,6 +1171,12 @@ dependencies = [ "libc", ] +[[package]] +name = "linked_list_allocator" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afa463f5405ee81cdb9cc2baf37e08ec7e4c8209442b5d72c04cfb2cd6e6286" + [[package]] name = "litrs" version = "0.4.1" @@ -1340,6 +1370,7 @@ dependencies = [ "bt-hci", "cortex-m", "cortex-m-rt", + "critical-section", "cyw43", "cyw43-pio", "defmt 0.3.100", @@ -1350,11 +1381,13 @@ dependencies = [ "embassy-rp 0.4.0", "embassy-sync 0.7.0", "embassy-time", + "embedded-alloc", "embedded-graphics", "embedded-hal 0.2.7", "embedded-hal-async", "embedded-hal-bus", "embedded-sdmmc", + "fixed", "panic-probe", "portable-atomic", "st7365p-lcd", @@ -1611,6 +1644,18 @@ dependencies = [ "bytemuck", ] +[[package]] +name = "rlsf" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222fb240c3286247ecdee6fa5341e7cdad0ffdf8e7e401d9937f2d58482a20bf" +dependencies = [ + "cfg-if", + "const-default", + "libc", + "svgbobdoc", +] + [[package]] name = "rp-binary-info" version = "0.1.1" @@ -1770,6 +1815,19 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "svgbobdoc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c04b93fc15d79b39c63218f15e3fdffaa4c227830686e3b7c5f41244eb3e50" +dependencies = [ + "base64", + "proc-macro2", + "quote", + "syn 1.0.109", + "unicode-width", +] + [[package]] name = "syn" version = "1.0.109" diff --git a/Cargo.toml b/Cargo.toml index 5a9425d..c6ffc9d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,6 +65,11 @@ cortex-m = { version = "0.7.7" } cortex-m-rt = "0.7.5" panic-probe = "0.3" portable-atomic = { version = "1.11", features = ["critical-section"] } +static_cell = "2.1.1" +bitflags = "2.9.1" +embedded-alloc = "0.6.0" +fixed = "1.29.0" +critical-section = "1.2.0" defmt = { version = "0.3", optional = true } defmt-rtt = "0.4.2" @@ -72,6 +77,3 @@ defmt-rtt = "0.4.2" embedded-graphics = { version = "0.8.1" } embedded-sdmmc = { git = "https://github.com/Be-ing/embedded-sdmmc-rs", branch = "bisync", default-features = false } st7365p-lcd = { git = "https://github.com/legitcamper/st7365p-lcd-rs" } - -static_cell = "2.1.1" -bitflags = "2.9.1" diff --git a/src/heap.rs b/src/heap.rs new file mode 100644 index 0000000..ee82966 --- /dev/null +++ b/src/heap.rs @@ -0,0 +1,133 @@ +// This whole file was taken from +// +// + +use core::alloc::{GlobalAlloc, Layout}; +use core::mem::MaybeUninit; +use core::sync::atomic::{AtomicUsize, Ordering}; +use embedded_alloc::LlffHeap as Heap; + +#[global_allocator] +pub static HEAP: DualHeap = DualHeap::empty(); +const HEAP_SIZE: usize = 64 * 1024; +static mut HEAP_MEM: [MaybeUninit; HEAP_SIZE] = [MaybeUninit::uninit(); HEAP_SIZE]; + +struct Region { + start: AtomicUsize, + size: AtomicUsize, +} + +impl Region { + const fn default() -> Self { + Self { + start: AtomicUsize::new(0), + size: AtomicUsize::new(0), + } + } + + fn contains(&self, address: usize) -> bool { + let start = self.start.load(Ordering::Relaxed); + let end = self.start.load(Ordering::Relaxed); + (start..start + end).contains(&address) + } + + fn new(start: usize, size: usize) -> Self { + Self { + start: AtomicUsize::new(start), + size: AtomicUsize::new(size), + } + } +} + +/// This is an allocator that combines two regions of memory. +/// The intent is to use some of the directly connected RAM +/// for this, and if we find some XIP capable PSRAM, add that +/// as a secondary region. +/// Allocation from the primary region is always preferred, +/// as it is expected to be a bit faster than PSRAM. +/// FIXME: PSRAM-allocated memory isn't compatible with +/// CAS atomics, so we might need a bit of a think about this! +pub struct DualHeap { + primary: Heap, + primary_region: Region, + secondary: Heap, +} + +impl DualHeap { + pub const fn empty() -> Self { + Self { + primary: Heap::empty(), + primary_region: Region::default(), + secondary: Heap::empty(), + } + } + + unsafe fn add_primary(&self, region: Region) { + let start = region.start.load(Ordering::SeqCst); + let size = region.size.load(Ordering::SeqCst); + unsafe { + self.primary.init(start, size); + } + self.primary_region.start.store(start, Ordering::SeqCst); + self.primary_region.size.store(size, Ordering::SeqCst); + } + + unsafe fn add_secondary(&self, region: Region) { + let start = region.start.load(Ordering::SeqCst); + let size = region.size.load(Ordering::SeqCst); + unsafe { + self.secondary.init(start, size); + } + } + + pub fn used(&self) -> usize { + self.primary.used() + self.secondary.used() + } + + pub fn free(&self) -> usize { + self.primary.free() + self.secondary.free() + } +} + +unsafe impl GlobalAlloc for DualHeap { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + unsafe { + let ptr = self.primary.alloc(layout); + if !ptr.is_null() { + return ptr; + } + // start using secondary area when primary heap is full + self.secondary.alloc(layout) + } + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + unsafe { + let ptr_usize = ptr as usize; + if self.primary_region.contains(ptr_usize) { + self.primary.dealloc(ptr, layout); + } else { + self.secondary.dealloc(ptr, layout); + } + } + } +} + +pub fn init_heap() { + let primary_start = &raw mut HEAP_MEM as usize; + unsafe { HEAP.add_primary(Region::new(primary_start, HEAP_SIZE)) } +} + +pub fn init_qmi_psram_heap(size: u32) { + unsafe { HEAP.add_secondary(Region::new(0x11000000, size as usize)) } +} + +pub async fn free_command(_args: &[&str]) { + let ram_used = HEAP.primary.used(); + let ram_free = HEAP.primary.free(); + let ram_total = ram_used + ram_free; + + let qmi_used = HEAP.secondary.used(); + let qmi_free = HEAP.secondary.free(); + let qmi_total = qmi_used + qmi_free; +} diff --git a/src/main.rs b/src/main.rs index 56b63f8..921dc45 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,9 +6,10 @@ use defmt::*; use {defmt_rtt as _, panic_probe as _}; +extern crate alloc; + use embassy_executor::Spawner; -use embassy_rp::peripherals::I2C1; -use embassy_rp::spi::Spi; +use embassy_rp::peripherals::{I2C1, PIO1}; use embassy_rp::{ bind_interrupts, gpio::{Level, Output}, @@ -16,6 +17,7 @@ use embassy_rp::{ i2c::I2c, spi, }; +use embassy_rp::{pio, spi::Spi}; use embassy_sync::blocking_mutex::raw::NoopRawMutex; use embassy_sync::channel::Channel; use embassy_time::Timer; @@ -27,14 +29,21 @@ mod peripherals; use peripherals::{keyboard::KeyEvent, peripherals_task}; mod display; use display::display_task; +mod heap; +use heap::{HEAP, init_heap}; +mod psram; embassy_rp::bind_interrupts!(struct Irqs { I2C1_IRQ => i2c::InterruptHandler; + PIO1_IRQ_0 => pio::InterruptHandler; }); +const MAX_SPI_FREQ: u32 = 62_500_000; + #[embassy_executor::main] async fn main(spawner: Spawner) { let p = embassy_rp::init(Default::default()); + init_heap(); static KEYBOARD_EVENTS: StaticCell> = StaticCell::new(); let keyboard_events = KEYBOARD_EVENTS.init(Channel::new()); @@ -47,11 +56,11 @@ async fn main(spawner: Spawner) { .spawn(peripherals_task(i2c1, keyboard_events.sender())) .unwrap(); - // // configure display handler - // let mut config = spi::Config::default(); - // config.frequency = 16_000_000; - // let spi1 = spi::Spi::new_blocking(p.SPI1, p.PIN_10, p.PIN_11, p.PIN_12, config); - // spawner - // .spawn(display_task(spi1, p.PIN_13, p.PIN_14, p.PIN_15)) - // .unwrap(); + // configure display handler + let mut config = spi::Config::default(); + config.frequency = MAX_SPI_FREQ; + let spi1 = spi::Spi::new_blocking(p.SPI1, p.PIN_10, p.PIN_11, p.PIN_12, config); + spawner + .spawn(display_task(spi1, p.PIN_13, p.PIN_14, p.PIN_15)) + .unwrap(); } diff --git a/src/psram.rs b/src/psram.rs new file mode 100644 index 0000000..3debf78 --- /dev/null +++ b/src/psram.rs @@ -0,0 +1,615 @@ +// This whole file was taken from +// +// +use crate::Irqs; +use embassy_futures::yield_now; +use embassy_rp::PeripheralRef; +use embassy_rp::clocks::clk_peri_freq; +use embassy_rp::gpio::Drive; +use embassy_rp::peripherals::{DMA_CH1, DMA_CH2, PIN_2, PIN_3, PIN_20, PIN_21, PIO1}; +use embassy_rp::pio::program::pio_asm; +use embassy_rp::pio::{Config, Direction, Pio, ShiftDirection}; +use embassy_time::{Duration, Instant, Timer}; +use fixed::FixedU32; +use fixed::types::extra::U8; + +// The physical connections in the picocalc schematic are: +// LABEL PICO ESP-PSRAM64H +// RAM_CS - PIN_20 CE (pulled up to 3v3 via 10kOhm) +// RAM_SCK - PIN_21 SCLK +// RAM_TX - PIN_2 SI/SIO0 +// RAM_RX - PIN_3 SO/SIO1 +// RAM_IO2 - PIN_4 SIO2 (QPI Mode) +// RAM_IO3 - PIN_5 SIO3 (QPI Mode) + +#[allow(unused)] +const PSRAM_CMD_QUAD_END: u8 = 0xf5; +#[allow(unused)] +const PSRAM_CMD_QUAD_ENABLE: u8 = 0x35; +#[allow(unused)] +const PSRAM_CMD_READ_ID: u8 = 0x9F; +const PSRAM_CMD_RSTEN: u8 = 0x66; +const PSRAM_CMD_RST: u8 = 0x99; +const PSRAM_CMD_WRITE: u8 = 0x02; +const PSRAM_CMD_FAST_READ: u8 = 0x0B; +#[allow(unused)] +const PSRAM_CMD_QUAD_READ: u8 = 0xEB; +#[allow(unused)] +const PSRAM_CMD_QUAD_WRITE: u8 = 0x38; +#[allow(unused)] +const PSRAM_CMD_NOOP: u8 = 0xFF; +#[allow(unused)] +const PSRAM_KNOWN_GOOD_DIE_PASS: u8 = 0x5d; + +pub struct PsRam { + sm: embassy_rp::pio::StateMachine<'static, PIO1, 0>, + tx_ch: PeripheralRef<'static, DMA_CH1>, + rx_ch: PeripheralRef<'static, DMA_CH2>, + pub size: u32, +} + +impl PsRam { + pub async fn send_command(&mut self, cmd: &[u8], out: &mut [u8]) { + if out.is_empty() { + self.sm + .tx() + .dma_push(self.tx_ch.reborrow(), cmd, false) + .await; + } else { + let (rx, tx) = self.sm.rx_tx(); + tx.dma_push(self.tx_ch.reborrow(), cmd, false).await; + rx.dma_pull(self.rx_ch.reborrow(), out, false).await; + } + } + + pub async fn write(&mut self, mut addr: u32, mut data: &[u8]) { + // I haven't seen this work reliably over 24 bytes + const MAX_CHUNK: usize = 24; + while data.len() > 0 { + let to_write = data.len().min(MAX_CHUNK); + //defmt::info!("writing {to_write} @ {addr}"); + + #[rustfmt::skip] + let mut to_send = [ + 32 + (to_write as u8 * 8), // write address + data + 0, // read 0 bits + PSRAM_CMD_WRITE, + ((addr >> 16) & 0xff) as u8, + ((addr >> 8) & 0xff) as u8, + (addr & 0xff) as u8, + // This sequence must be MAX_CHUNK in length + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + ]; + + for (src, dst) in data.iter().zip(to_send.iter_mut().skip(6)) { + *dst = *src; + } + + self.send_command(&to_send[0..6 + to_write], &mut []).await; + addr += to_write as u32; + data = &data[to_write..]; + } + } + + pub async fn read_id(&mut self) -> [u8; 3] { + let mut id = [0u8; 3]; + #[rustfmt::skip] + self.send_command( + &[ + 32, // write 32 bits + 3 * 8, // read 8 bytes = 64 bits + PSRAM_CMD_READ_ID, + // don't care: 24-bit "address" + 0, 0, 0, + ], + &mut id, + ) + .await; + id + } + + pub async fn read(&mut self, mut addr: u32, mut out: &mut [u8]) { + // Cannot get reliable reads above 4 bytes at a time. + // out[4] will always have a bit error + const MAX_CHUNK: usize = 4; + while out.len() > 0 { + let to_read = out.len().min(MAX_CHUNK); + //defmt::info!("reading {to_read} @ {addr}"); + self.send_command( + &[ + 40, // write 40 bits + to_read as u8 * 8, // read n bytes + PSRAM_CMD_FAST_READ, + ((addr >> 16) & 0xff) as u8, + ((addr >> 8) & 0xff) as u8, + (addr & 0xff) as u8, + 0, // 8 cycle delay by sending 8 bits of don't care data + ], + &mut out[0..to_read], + ) + .await; + addr += to_read as u32; + out = &mut out[to_read..]; + } + } + + #[allow(unused)] + pub async fn write8(&mut self, addr: u32, data: u8) { + //defmt::info!("write8 addr {addr} <- {data:x}"); + self.send_command( + &[ + 40, // write 40 bits + 0, // read 0 bits + PSRAM_CMD_WRITE, + ((addr >> 16) & 0xff) as u8, + ((addr >> 8) & 0xff) as u8, + (addr & 0xff) as u8, + data, + ], + &mut [], + ) + .await; + } + + #[allow(unused)] + pub async fn read8(&mut self, addr: u32) -> u8 { + let mut buf = [0u8]; + self.send_command( + &[ + 40, // write 40 bits + 8, // read 8 bits + PSRAM_CMD_FAST_READ, + ((addr >> 16) & 0xff) as u8, + ((addr >> 8) & 0xff) as u8, + (addr & 0xff) as u8, + 0, // 8 cycle delay + ], + &mut buf, + ) + .await; + buf[0] + } +} + +pub async fn init_psram( + pio_1: PIO1, + sclk: PIN_21, + mosi: PIN_2, + miso: PIN_3, + cs: PIN_20, + dma_ch1: DMA_CH1, + dma_ch2: DMA_CH2, +) -> PsRam { + let mut pio = Pio::new(pio_1, Irqs); + + let clock_hz = FixedU32::from_num(embassy_rp::clocks::clk_sys_freq()); + let max_psram_freq: FixedU32 = FixedU32::from_num(100_000_000); + + let divider = if clock_hz <= max_psram_freq { + FixedU32::from_num(1) + } else { + clock_hz / max_psram_freq + }; + let effective_clock = clock_hz / divider; + use embassy_rp::clocks::*; + defmt::info!( + "pll_sys_freq={} rosc_freq={} xosc_freq={}", + pll_sys_freq(), + rosc_freq(), + xosc_freq() + ); + + // This pio program was taken from + // + // which is Copyright © 2023 Ian Scott, reproduced here under the MIT license + + let p = pio_asm!( + r#" +.side_set 2 ; sideset bit 1 is SCK, bit 0 is CS +begin: + out x, 8 side 0b01 ; x = number of bits to output. CS deasserted + out y, 8 side 0b01 ; y = number of bits to input + jmp x--, writeloop side 0b01 ; Pre-decement x by 1 so loop has correct number of iterations +writeloop: + out pins, 1 side 0b00 ; Write value on pin, lower clock. CS asserted + jmp x--, writeloop side 0b10 ; Raise clock: this is when PSRAM reads the value. Loop if we have more to write + jmp !y, done side 0b00 ; If this is a write-only operation, jump back to beginning + nop side 0b10 ; Fudge factor of extra clock cycle; the PSRAM needs 1 extra for output to start appearing + jmp readloop_mid side 0b00 ; Jump to middle of readloop to decrement y and get right clock phase +readloop: + in pins, 1 side 0b00 ; Read value on pin, lower clock. Datasheet says to read on falling edge > 83MHz +readloop_mid: + jmp y--, readloop side 0b10 ; Raise clock. Loop if we have more to read +done: + nop side 0b11 ; CS deasserted + "# + ); + let prog = pio.common.load_program(&p.program); + + let mut cfg = Config::default(); + + let mut cs = pio.common.make_pio_pin(cs); + let mut sclk = pio.common.make_pio_pin(sclk); + let mut mosi = pio.common.make_pio_pin(mosi); + let mut miso = pio.common.make_pio_pin(miso); + + cs.set_drive_strength(Drive::_4mA); + sclk.set_drive_strength(Drive::_4mA); + mosi.set_drive_strength(Drive::_4mA); + miso.set_drive_strength(Drive::_4mA); + + cfg.use_program(&prog, &[&cs, &sclk]); + cfg.set_out_pins(&[&mosi]); + cfg.set_in_pins(&[&miso]); + + cfg.shift_out.direction = ShiftDirection::Left; + cfg.shift_out.auto_fill = true; + cfg.shift_out.threshold = 8; + + cfg.shift_in = cfg.shift_out; + cfg.clock_divider = divider; + + let mut sm = pio.sm0; + sm.set_pin_dirs(Direction::Out, &[&cs, &sclk]); + sm.set_pin_dirs(Direction::Out, &[&mosi]); + sm.set_pin_dirs(Direction::In, &[&miso]); + miso.set_input_sync_bypass(true); + + sm.set_config(&cfg); + sm.set_enable(true); + + let dma_ch1 = PeripheralRef::new(dma_ch1); + let dma_ch2 = PeripheralRef::new(dma_ch2); + + let mut psram = PsRam { + sm, + tx_ch: dma_ch1, + rx_ch: dma_ch2, + size: 0, + }; + + // Issue a reset command + psram.send_command(&[8, 0, PSRAM_CMD_RSTEN], &mut []).await; + Timer::after(Duration::from_micros(50)).await; + psram.send_command(&[8, 0, PSRAM_CMD_RST], &mut []).await; + Timer::after(Duration::from_micros(100)).await; + + defmt::info!("Verifying 1 byte write and read..."); + for i in 0..10u8 { + psram.write8(i as u32, i).await; + } + for i in 0..10u32 { + let n = psram.read8(i as u32).await; + if n as u32 != i {} + } + defmt::info!("testing read again @ 0"); + let mut got = [0u8; 8]; + psram.read(0, &mut got).await; + const EXPECT: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7]; + if got != EXPECT {} + + const DEADBEEF: &[u8] = &[0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xf]; + defmt::info!("testing write of deadbeef at 0"); + psram.write(0, DEADBEEF).await; + + defmt::info!("testing read of deadbeef from 0"); + psram.read(0, &mut got).await; + if got != DEADBEEF { + for addr in 0..DEADBEEF.len() { + let bad = got[addr]; + if bad != DEADBEEF[addr] { + let x = psram.read8(addr as u32).await; + } + } + } + + const TEST_STRING: &[u8] = b"hello there, this is a test, how is it?"; + psram.write(16, TEST_STRING).await; + + let mut buffer = [0u8; 42]; + psram.read(16, &mut buffer).await; + + let got = &buffer[0..TEST_STRING.len()]; + + if got != TEST_STRING {} + + defmt::info!("PSRAM test complete"); + + let id = psram.read_id().await; + // id: [d, 5d, 53, 15, 49, e3, 7c, 7b] + // id[0] -- manufacturer id + // id[1] -- "known good die" status + if id[1] == PSRAM_KNOWN_GOOD_DIE_PASS { + // See + // for information on deciding the size of ESP PSRAM chips, + // such as the one used in the picocalc + let size = match (id[2] >> 5) & 0x7 { + 0 => 16, + 1 => 32, + 2 => 64, + _ => 0, + }; + psram.size = size * 1024 * 1024 / 8; + } + + psram +} + +#[allow(unused)] +async fn test_psram(psram: &mut PsRam) -> bool { + const REPORT_CHUNK: u32 = 256 * 1024; + const BLOCK_SIZE: usize = 8; + let limit = psram.size; //.min(4 * 1024 * 1024); + + let start = Instant::now(); + + fn expect(addr: u32) -> [u8; BLOCK_SIZE] { + [ + !((addr >> 24 & 0xff) as u8), + !((addr >> 16 & 0xff) as u8), + !((addr >> 8 & 0xff) as u8), + !((addr & 0xff) as u8), + ((addr >> 24 & 0xff) as u8), + ((addr >> 16 & 0xff) as u8), + ((addr >> 8 & 0xff) as u8), + ((addr & 0xff) as u8), + ] + } + + for i in 0..limit / BLOCK_SIZE as u32 { + let addr = i * BLOCK_SIZE as u32; + let data = expect(addr); + psram.write(addr, &data).await; + if addr > 0 && addr % REPORT_CHUNK == 0 { + if start.elapsed() > Duration::from_secs(5) {} + } + // Yield so that the watchdog doesn't kick in + yield_now().await; + } + let writes_took = start.elapsed(); + + defmt::info!("Starting reads..."); + Timer::after(Duration::from_millis(200)).await; + + let start = Instant::now(); + let mut bad_count = 0; + let mut data = [0u8; BLOCK_SIZE]; + for i in 0..limit / BLOCK_SIZE as u32 { + let addr = i * BLOCK_SIZE as u32; + let expect = expect(addr); + psram.read(addr, &mut data).await; + if addr == 0 { + Timer::after(Duration::from_millis(200)).await; + } + if data != expect { + bad_count += 1; + if bad_count < 50 {} + } + if addr > 0 && addr % REPORT_CHUNK == 0 { + if start.elapsed() > Duration::from_secs(5) {} + } + + // Yield so that the watchdog doesn't kick in + yield_now().await; + } + let reads_took = start.elapsed(); + + bad_count == 0 +} + +// The origin of the code in this file is: +// +// which is MIT/Apache-2 licensed. +#[unsafe(link_section = ".data")] +#[inline(never)] +pub fn detect_psram_qmi(qmi: &embassy_rp::pac::qmi::Qmi) -> u32 { + const GPIO_FUNC_XIP_CS1: u8 = 9; + const XIP_CS_PIN: usize = 47; + embassy_rp::pac::PADS_BANK0.gpio(XIP_CS_PIN).modify(|w| { + w.set_iso(true); + }); + embassy_rp::pac::PADS_BANK0.gpio(XIP_CS_PIN).modify(|w| { + w.set_ie(true); + w.set_od(false); + }); + embassy_rp::pac::IO_BANK0 + .gpio(XIP_CS_PIN) + .ctrl() + .write(|w| w.set_funcsel(GPIO_FUNC_XIP_CS1)); + embassy_rp::pac::PADS_BANK0.gpio(XIP_CS_PIN).modify(|w| { + w.set_iso(false); + }); + + critical_section::with(|_cs| { + // Try and read the PSRAM ID via direct_csr. + qmi.direct_csr().write(|w| { + w.set_clkdiv(30); + w.set_en(true); + }); + + // Need to poll for the cooldown on the last XIP transfer to expire + // (via direct-mode BUSY flag) before it is safe to perform the first + // direct-mode operation + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + // Exit out of QMI in case we've inited already + qmi.direct_csr().modify(|w| w.set_assert_cs1n(true)); + + // Transmit the command to exit QPI quad mode - read ID as standard SPI + // Transmit as quad. + qmi.direct_tx().write(|w| { + w.set_oe(true); + w.set_iwidth(embassy_rp::pac::qmi::vals::Iwidth::Q); + w.set_data(PSRAM_CMD_QUAD_END.into()); + }); + + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + let _ = qmi.direct_rx().read(); + + qmi.direct_csr().modify(|w| { + w.set_assert_cs1n(false); + }); + + // Read the id + qmi.direct_csr().modify(|w| { + w.set_assert_cs1n(true); + }); + + // kgd is "known good die" + let mut kgd: u16 = 0; + let mut eid: u16 = 0; + for i in 0usize..7 { + qmi.direct_tx().write(|w| { + w.set_data(if i == 0 { + PSRAM_CMD_READ_ID.into() + } else { + PSRAM_CMD_NOOP.into() + }) + }); + + while !qmi.direct_csr().read().txempty() { + // rp235x_hal::arch::nop(); + } + + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + let value = qmi.direct_rx().read().direct_rx(); + match i { + 5 => { + kgd = value; + } + 6 => { + eid = value; + } + _ => {} + } + } + + qmi.direct_csr().modify(|w| { + w.set_assert_cs1n(false); + w.set_en(false); + }); + let mut param_size: u32 = 0; + if kgd == PSRAM_KNOWN_GOOD_DIE_PASS as u16 { + param_size = 1024 * 1024; + let size_id = eid >> 5; + if eid == 0x26 || size_id == 2 { + param_size *= 8; + } else if size_id == 0 { + param_size *= 2; + } else if size_id == 1 { + param_size *= 4; + } + } + param_size + }) +} + +#[unsafe(link_section = ".data")] +#[inline(never)] +pub fn init_psram_qmi( + qmi: &embassy_rp::pac::qmi::Qmi, + xip: &embassy_rp::pac::xip_ctrl::XipCtrl, +) -> u32 { + let psram_size = detect_psram_qmi(qmi); + + if psram_size == 0 { + return 0; + } + + // Set PSRAM timing for APS6404 + // + // Using an rxdelay equal to the divisor isn't enough when running the APS6404 close to 133MHz. + // So: don't allow running at divisor 1 above 100MHz (because delay of 2 would be too late), + // and add an extra 1 to the rxdelay if the divided clock is > 100MHz (i.e. sys clock > 200MHz). + const MAX_PSRAM_FREQ: u32 = 133_000_000; + + let clock_hz = clk_peri_freq(); + + let mut divisor: u32 = (clock_hz + MAX_PSRAM_FREQ - 1) / MAX_PSRAM_FREQ; + if divisor == 1 && clock_hz > 100_000_000 { + divisor = 2; + } + let mut rxdelay: u32 = divisor; + if clock_hz / divisor > 100_000_000 { + rxdelay += 1; + } + + // - Max select must be <= 8us. The value is given in multiples of 64 system clocks. + // - Min deselect must be >= 18ns. The value is given in system clock cycles - ceil(divisor / 2). + let clock_period_fs: u64 = 1_000_000_000_000_000_u64 / u64::from(clock_hz); + let max_select: u8 = ((125 * 1_000_000) / clock_period_fs) as u8; + let min_deselect: u32 = ((18 * 1_000_000 + (clock_period_fs - 1)) / clock_period_fs + - u64::from(divisor + 1) / 2) as u32; + + qmi.direct_csr().write(|w| { + w.set_clkdiv(10); + w.set_en(true); + w.set_auto_cs1n(true); + }); + + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + qmi.direct_tx().write(|w| { + w.set_nopush(true); + w.0 = 0x35; + }); + + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + qmi.mem(1).timing().write(|w| { + w.set_cooldown(1); + w.set_pagebreak(embassy_rp::pac::qmi::vals::Pagebreak::_1024); + w.set_max_select(max_select as u8); + w.set_min_deselect(min_deselect as u8); + w.set_rxdelay(rxdelay as u8); + w.set_clkdiv(divisor as u8); + }); + + // // Set PSRAM commands and formats + qmi.mem(1).rfmt().write(|w| { + w.set_prefix_width(embassy_rp::pac::qmi::vals::PrefixWidth::Q); + w.set_addr_width(embassy_rp::pac::qmi::vals::AddrWidth::Q); + w.set_suffix_width(embassy_rp::pac::qmi::vals::SuffixWidth::Q); + w.set_dummy_width(embassy_rp::pac::qmi::vals::DummyWidth::Q); + w.set_data_width(embassy_rp::pac::qmi::vals::DataWidth::Q); + w.set_prefix_len(embassy_rp::pac::qmi::vals::PrefixLen::_8); + w.set_dummy_len(embassy_rp::pac::qmi::vals::DummyLen::_24); + }); + + qmi.mem(1).rcmd().write(|w| w.0 = 0xEB); + + qmi.mem(1).wfmt().write(|w| { + w.set_prefix_width(embassy_rp::pac::qmi::vals::PrefixWidth::Q); + w.set_addr_width(embassy_rp::pac::qmi::vals::AddrWidth::Q); + w.set_suffix_width(embassy_rp::pac::qmi::vals::SuffixWidth::Q); + w.set_dummy_width(embassy_rp::pac::qmi::vals::DummyWidth::Q); + w.set_data_width(embassy_rp::pac::qmi::vals::DataWidth::Q); + w.set_prefix_len(embassy_rp::pac::qmi::vals::PrefixLen::_8); + }); + + qmi.mem(1).wcmd().write(|w| w.0 = 0x38); + + // Disable direct mode + qmi.direct_csr().write(|w| w.0 = 0); + + // Enable writes to PSRAM + xip.ctrl().modify(|w| w.set_writable_m1(true)); + psram_size +}