From 13d267f61e49d065cf3e6db714d5696390a36556 Mon Sep 17 00:00:00 2001 From: sawyer bristol Date: Thu, 26 Jun 2025 20:34:18 -0600 Subject: [PATCH 1/3] can alloc with psram --- Cargo.lock | 58 +++++ Cargo.toml | 8 +- src/heap.rs | 133 +++++++++++ src/main.rs | 27 ++- src/psram.rs | 615 +++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 829 insertions(+), 12 deletions(-) create mode 100644 src/heap.rs create mode 100644 src/psram.rs diff --git a/Cargo.lock b/Cargo.lock index fe98260..0473f8c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,6 +75,12 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "bisync" version = "0.3.0" @@ -196,6 +202,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "const-default" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b396d1f76d455557e1218ec8066ae14bba60b4b36ecd55577ba979f5db7ecaa" + [[package]] name = "cortex-m" version = "0.7.7" @@ -673,6 +685,18 @@ dependencies = [ "defmt 0.3.100", ] +[[package]] +name = "embedded-alloc" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f2de9133f68db0d4627ad69db767726c99ff8585272716708227008d3f1bddd" +dependencies = [ + "const-default", + "critical-section", + "linked_list_allocator", + "rlsf", +] + [[package]] name = "embedded-graphics" version = "0.8.1" @@ -1147,6 +1171,12 @@ dependencies = [ "libc", ] +[[package]] +name = "linked_list_allocator" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afa463f5405ee81cdb9cc2baf37e08ec7e4c8209442b5d72c04cfb2cd6e6286" + [[package]] name = "litrs" version = "0.4.1" @@ -1340,6 +1370,7 @@ dependencies = [ "bt-hci", "cortex-m", "cortex-m-rt", + "critical-section", "cyw43", "cyw43-pio", "defmt 0.3.100", @@ -1350,11 +1381,13 @@ dependencies = [ "embassy-rp 0.4.0", "embassy-sync 0.7.0", "embassy-time", + "embedded-alloc", "embedded-graphics", "embedded-hal 0.2.7", "embedded-hal-async", "embedded-hal-bus", "embedded-sdmmc", + "fixed", "panic-probe", "portable-atomic", "st7365p-lcd", @@ -1611,6 +1644,18 @@ dependencies = [ "bytemuck", ] +[[package]] +name = "rlsf" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222fb240c3286247ecdee6fa5341e7cdad0ffdf8e7e401d9937f2d58482a20bf" +dependencies = [ + "cfg-if", + "const-default", + "libc", + "svgbobdoc", +] + [[package]] name = "rp-binary-info" version = "0.1.1" @@ -1770,6 +1815,19 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "svgbobdoc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c04b93fc15d79b39c63218f15e3fdffaa4c227830686e3b7c5f41244eb3e50" +dependencies = [ + "base64", + "proc-macro2", + "quote", + "syn 1.0.109", + "unicode-width", +] + [[package]] name = "syn" version = "1.0.109" diff --git a/Cargo.toml b/Cargo.toml index 5a9425d..c6ffc9d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -65,6 +65,11 @@ cortex-m = { version = "0.7.7" } cortex-m-rt = "0.7.5" panic-probe = "0.3" portable-atomic = { version = "1.11", features = ["critical-section"] } +static_cell = "2.1.1" +bitflags = "2.9.1" +embedded-alloc = "0.6.0" +fixed = "1.29.0" +critical-section = "1.2.0" defmt = { version = "0.3", optional = true } defmt-rtt = "0.4.2" @@ -72,6 +77,3 @@ defmt-rtt = "0.4.2" embedded-graphics = { version = "0.8.1" } embedded-sdmmc = { git = "https://github.com/Be-ing/embedded-sdmmc-rs", branch = "bisync", default-features = false } st7365p-lcd = { git = "https://github.com/legitcamper/st7365p-lcd-rs" } - -static_cell = "2.1.1" -bitflags = "2.9.1" diff --git a/src/heap.rs b/src/heap.rs new file mode 100644 index 0000000..ee82966 --- /dev/null +++ b/src/heap.rs @@ -0,0 +1,133 @@ +// This whole file was taken from +// +// + +use core::alloc::{GlobalAlloc, Layout}; +use core::mem::MaybeUninit; +use core::sync::atomic::{AtomicUsize, Ordering}; +use embedded_alloc::LlffHeap as Heap; + +#[global_allocator] +pub static HEAP: DualHeap = DualHeap::empty(); +const HEAP_SIZE: usize = 64 * 1024; +static mut HEAP_MEM: [MaybeUninit; HEAP_SIZE] = [MaybeUninit::uninit(); HEAP_SIZE]; + +struct Region { + start: AtomicUsize, + size: AtomicUsize, +} + +impl Region { + const fn default() -> Self { + Self { + start: AtomicUsize::new(0), + size: AtomicUsize::new(0), + } + } + + fn contains(&self, address: usize) -> bool { + let start = self.start.load(Ordering::Relaxed); + let end = self.start.load(Ordering::Relaxed); + (start..start + end).contains(&address) + } + + fn new(start: usize, size: usize) -> Self { + Self { + start: AtomicUsize::new(start), + size: AtomicUsize::new(size), + } + } +} + +/// This is an allocator that combines two regions of memory. +/// The intent is to use some of the directly connected RAM +/// for this, and if we find some XIP capable PSRAM, add that +/// as a secondary region. +/// Allocation from the primary region is always preferred, +/// as it is expected to be a bit faster than PSRAM. +/// FIXME: PSRAM-allocated memory isn't compatible with +/// CAS atomics, so we might need a bit of a think about this! +pub struct DualHeap { + primary: Heap, + primary_region: Region, + secondary: Heap, +} + +impl DualHeap { + pub const fn empty() -> Self { + Self { + primary: Heap::empty(), + primary_region: Region::default(), + secondary: Heap::empty(), + } + } + + unsafe fn add_primary(&self, region: Region) { + let start = region.start.load(Ordering::SeqCst); + let size = region.size.load(Ordering::SeqCst); + unsafe { + self.primary.init(start, size); + } + self.primary_region.start.store(start, Ordering::SeqCst); + self.primary_region.size.store(size, Ordering::SeqCst); + } + + unsafe fn add_secondary(&self, region: Region) { + let start = region.start.load(Ordering::SeqCst); + let size = region.size.load(Ordering::SeqCst); + unsafe { + self.secondary.init(start, size); + } + } + + pub fn used(&self) -> usize { + self.primary.used() + self.secondary.used() + } + + pub fn free(&self) -> usize { + self.primary.free() + self.secondary.free() + } +} + +unsafe impl GlobalAlloc for DualHeap { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + unsafe { + let ptr = self.primary.alloc(layout); + if !ptr.is_null() { + return ptr; + } + // start using secondary area when primary heap is full + self.secondary.alloc(layout) + } + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + unsafe { + let ptr_usize = ptr as usize; + if self.primary_region.contains(ptr_usize) { + self.primary.dealloc(ptr, layout); + } else { + self.secondary.dealloc(ptr, layout); + } + } + } +} + +pub fn init_heap() { + let primary_start = &raw mut HEAP_MEM as usize; + unsafe { HEAP.add_primary(Region::new(primary_start, HEAP_SIZE)) } +} + +pub fn init_qmi_psram_heap(size: u32) { + unsafe { HEAP.add_secondary(Region::new(0x11000000, size as usize)) } +} + +pub async fn free_command(_args: &[&str]) { + let ram_used = HEAP.primary.used(); + let ram_free = HEAP.primary.free(); + let ram_total = ram_used + ram_free; + + let qmi_used = HEAP.secondary.used(); + let qmi_free = HEAP.secondary.free(); + let qmi_total = qmi_used + qmi_free; +} diff --git a/src/main.rs b/src/main.rs index 56b63f8..921dc45 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,9 +6,10 @@ use defmt::*; use {defmt_rtt as _, panic_probe as _}; +extern crate alloc; + use embassy_executor::Spawner; -use embassy_rp::peripherals::I2C1; -use embassy_rp::spi::Spi; +use embassy_rp::peripherals::{I2C1, PIO1}; use embassy_rp::{ bind_interrupts, gpio::{Level, Output}, @@ -16,6 +17,7 @@ use embassy_rp::{ i2c::I2c, spi, }; +use embassy_rp::{pio, spi::Spi}; use embassy_sync::blocking_mutex::raw::NoopRawMutex; use embassy_sync::channel::Channel; use embassy_time::Timer; @@ -27,14 +29,21 @@ mod peripherals; use peripherals::{keyboard::KeyEvent, peripherals_task}; mod display; use display::display_task; +mod heap; +use heap::{HEAP, init_heap}; +mod psram; embassy_rp::bind_interrupts!(struct Irqs { I2C1_IRQ => i2c::InterruptHandler; + PIO1_IRQ_0 => pio::InterruptHandler; }); +const MAX_SPI_FREQ: u32 = 62_500_000; + #[embassy_executor::main] async fn main(spawner: Spawner) { let p = embassy_rp::init(Default::default()); + init_heap(); static KEYBOARD_EVENTS: StaticCell> = StaticCell::new(); let keyboard_events = KEYBOARD_EVENTS.init(Channel::new()); @@ -47,11 +56,11 @@ async fn main(spawner: Spawner) { .spawn(peripherals_task(i2c1, keyboard_events.sender())) .unwrap(); - // // configure display handler - // let mut config = spi::Config::default(); - // config.frequency = 16_000_000; - // let spi1 = spi::Spi::new_blocking(p.SPI1, p.PIN_10, p.PIN_11, p.PIN_12, config); - // spawner - // .spawn(display_task(spi1, p.PIN_13, p.PIN_14, p.PIN_15)) - // .unwrap(); + // configure display handler + let mut config = spi::Config::default(); + config.frequency = MAX_SPI_FREQ; + let spi1 = spi::Spi::new_blocking(p.SPI1, p.PIN_10, p.PIN_11, p.PIN_12, config); + spawner + .spawn(display_task(spi1, p.PIN_13, p.PIN_14, p.PIN_15)) + .unwrap(); } diff --git a/src/psram.rs b/src/psram.rs new file mode 100644 index 0000000..3debf78 --- /dev/null +++ b/src/psram.rs @@ -0,0 +1,615 @@ +// This whole file was taken from +// +// +use crate::Irqs; +use embassy_futures::yield_now; +use embassy_rp::PeripheralRef; +use embassy_rp::clocks::clk_peri_freq; +use embassy_rp::gpio::Drive; +use embassy_rp::peripherals::{DMA_CH1, DMA_CH2, PIN_2, PIN_3, PIN_20, PIN_21, PIO1}; +use embassy_rp::pio::program::pio_asm; +use embassy_rp::pio::{Config, Direction, Pio, ShiftDirection}; +use embassy_time::{Duration, Instant, Timer}; +use fixed::FixedU32; +use fixed::types::extra::U8; + +// The physical connections in the picocalc schematic are: +// LABEL PICO ESP-PSRAM64H +// RAM_CS - PIN_20 CE (pulled up to 3v3 via 10kOhm) +// RAM_SCK - PIN_21 SCLK +// RAM_TX - PIN_2 SI/SIO0 +// RAM_RX - PIN_3 SO/SIO1 +// RAM_IO2 - PIN_4 SIO2 (QPI Mode) +// RAM_IO3 - PIN_5 SIO3 (QPI Mode) + +#[allow(unused)] +const PSRAM_CMD_QUAD_END: u8 = 0xf5; +#[allow(unused)] +const PSRAM_CMD_QUAD_ENABLE: u8 = 0x35; +#[allow(unused)] +const PSRAM_CMD_READ_ID: u8 = 0x9F; +const PSRAM_CMD_RSTEN: u8 = 0x66; +const PSRAM_CMD_RST: u8 = 0x99; +const PSRAM_CMD_WRITE: u8 = 0x02; +const PSRAM_CMD_FAST_READ: u8 = 0x0B; +#[allow(unused)] +const PSRAM_CMD_QUAD_READ: u8 = 0xEB; +#[allow(unused)] +const PSRAM_CMD_QUAD_WRITE: u8 = 0x38; +#[allow(unused)] +const PSRAM_CMD_NOOP: u8 = 0xFF; +#[allow(unused)] +const PSRAM_KNOWN_GOOD_DIE_PASS: u8 = 0x5d; + +pub struct PsRam { + sm: embassy_rp::pio::StateMachine<'static, PIO1, 0>, + tx_ch: PeripheralRef<'static, DMA_CH1>, + rx_ch: PeripheralRef<'static, DMA_CH2>, + pub size: u32, +} + +impl PsRam { + pub async fn send_command(&mut self, cmd: &[u8], out: &mut [u8]) { + if out.is_empty() { + self.sm + .tx() + .dma_push(self.tx_ch.reborrow(), cmd, false) + .await; + } else { + let (rx, tx) = self.sm.rx_tx(); + tx.dma_push(self.tx_ch.reborrow(), cmd, false).await; + rx.dma_pull(self.rx_ch.reborrow(), out, false).await; + } + } + + pub async fn write(&mut self, mut addr: u32, mut data: &[u8]) { + // I haven't seen this work reliably over 24 bytes + const MAX_CHUNK: usize = 24; + while data.len() > 0 { + let to_write = data.len().min(MAX_CHUNK); + //defmt::info!("writing {to_write} @ {addr}"); + + #[rustfmt::skip] + let mut to_send = [ + 32 + (to_write as u8 * 8), // write address + data + 0, // read 0 bits + PSRAM_CMD_WRITE, + ((addr >> 16) & 0xff) as u8, + ((addr >> 8) & 0xff) as u8, + (addr & 0xff) as u8, + // This sequence must be MAX_CHUNK in length + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + ]; + + for (src, dst) in data.iter().zip(to_send.iter_mut().skip(6)) { + *dst = *src; + } + + self.send_command(&to_send[0..6 + to_write], &mut []).await; + addr += to_write as u32; + data = &data[to_write..]; + } + } + + pub async fn read_id(&mut self) -> [u8; 3] { + let mut id = [0u8; 3]; + #[rustfmt::skip] + self.send_command( + &[ + 32, // write 32 bits + 3 * 8, // read 8 bytes = 64 bits + PSRAM_CMD_READ_ID, + // don't care: 24-bit "address" + 0, 0, 0, + ], + &mut id, + ) + .await; + id + } + + pub async fn read(&mut self, mut addr: u32, mut out: &mut [u8]) { + // Cannot get reliable reads above 4 bytes at a time. + // out[4] will always have a bit error + const MAX_CHUNK: usize = 4; + while out.len() > 0 { + let to_read = out.len().min(MAX_CHUNK); + //defmt::info!("reading {to_read} @ {addr}"); + self.send_command( + &[ + 40, // write 40 bits + to_read as u8 * 8, // read n bytes + PSRAM_CMD_FAST_READ, + ((addr >> 16) & 0xff) as u8, + ((addr >> 8) & 0xff) as u8, + (addr & 0xff) as u8, + 0, // 8 cycle delay by sending 8 bits of don't care data + ], + &mut out[0..to_read], + ) + .await; + addr += to_read as u32; + out = &mut out[to_read..]; + } + } + + #[allow(unused)] + pub async fn write8(&mut self, addr: u32, data: u8) { + //defmt::info!("write8 addr {addr} <- {data:x}"); + self.send_command( + &[ + 40, // write 40 bits + 0, // read 0 bits + PSRAM_CMD_WRITE, + ((addr >> 16) & 0xff) as u8, + ((addr >> 8) & 0xff) as u8, + (addr & 0xff) as u8, + data, + ], + &mut [], + ) + .await; + } + + #[allow(unused)] + pub async fn read8(&mut self, addr: u32) -> u8 { + let mut buf = [0u8]; + self.send_command( + &[ + 40, // write 40 bits + 8, // read 8 bits + PSRAM_CMD_FAST_READ, + ((addr >> 16) & 0xff) as u8, + ((addr >> 8) & 0xff) as u8, + (addr & 0xff) as u8, + 0, // 8 cycle delay + ], + &mut buf, + ) + .await; + buf[0] + } +} + +pub async fn init_psram( + pio_1: PIO1, + sclk: PIN_21, + mosi: PIN_2, + miso: PIN_3, + cs: PIN_20, + dma_ch1: DMA_CH1, + dma_ch2: DMA_CH2, +) -> PsRam { + let mut pio = Pio::new(pio_1, Irqs); + + let clock_hz = FixedU32::from_num(embassy_rp::clocks::clk_sys_freq()); + let max_psram_freq: FixedU32 = FixedU32::from_num(100_000_000); + + let divider = if clock_hz <= max_psram_freq { + FixedU32::from_num(1) + } else { + clock_hz / max_psram_freq + }; + let effective_clock = clock_hz / divider; + use embassy_rp::clocks::*; + defmt::info!( + "pll_sys_freq={} rosc_freq={} xosc_freq={}", + pll_sys_freq(), + rosc_freq(), + xosc_freq() + ); + + // This pio program was taken from + // + // which is Copyright © 2023 Ian Scott, reproduced here under the MIT license + + let p = pio_asm!( + r#" +.side_set 2 ; sideset bit 1 is SCK, bit 0 is CS +begin: + out x, 8 side 0b01 ; x = number of bits to output. CS deasserted + out y, 8 side 0b01 ; y = number of bits to input + jmp x--, writeloop side 0b01 ; Pre-decement x by 1 so loop has correct number of iterations +writeloop: + out pins, 1 side 0b00 ; Write value on pin, lower clock. CS asserted + jmp x--, writeloop side 0b10 ; Raise clock: this is when PSRAM reads the value. Loop if we have more to write + jmp !y, done side 0b00 ; If this is a write-only operation, jump back to beginning + nop side 0b10 ; Fudge factor of extra clock cycle; the PSRAM needs 1 extra for output to start appearing + jmp readloop_mid side 0b00 ; Jump to middle of readloop to decrement y and get right clock phase +readloop: + in pins, 1 side 0b00 ; Read value on pin, lower clock. Datasheet says to read on falling edge > 83MHz +readloop_mid: + jmp y--, readloop side 0b10 ; Raise clock. Loop if we have more to read +done: + nop side 0b11 ; CS deasserted + "# + ); + let prog = pio.common.load_program(&p.program); + + let mut cfg = Config::default(); + + let mut cs = pio.common.make_pio_pin(cs); + let mut sclk = pio.common.make_pio_pin(sclk); + let mut mosi = pio.common.make_pio_pin(mosi); + let mut miso = pio.common.make_pio_pin(miso); + + cs.set_drive_strength(Drive::_4mA); + sclk.set_drive_strength(Drive::_4mA); + mosi.set_drive_strength(Drive::_4mA); + miso.set_drive_strength(Drive::_4mA); + + cfg.use_program(&prog, &[&cs, &sclk]); + cfg.set_out_pins(&[&mosi]); + cfg.set_in_pins(&[&miso]); + + cfg.shift_out.direction = ShiftDirection::Left; + cfg.shift_out.auto_fill = true; + cfg.shift_out.threshold = 8; + + cfg.shift_in = cfg.shift_out; + cfg.clock_divider = divider; + + let mut sm = pio.sm0; + sm.set_pin_dirs(Direction::Out, &[&cs, &sclk]); + sm.set_pin_dirs(Direction::Out, &[&mosi]); + sm.set_pin_dirs(Direction::In, &[&miso]); + miso.set_input_sync_bypass(true); + + sm.set_config(&cfg); + sm.set_enable(true); + + let dma_ch1 = PeripheralRef::new(dma_ch1); + let dma_ch2 = PeripheralRef::new(dma_ch2); + + let mut psram = PsRam { + sm, + tx_ch: dma_ch1, + rx_ch: dma_ch2, + size: 0, + }; + + // Issue a reset command + psram.send_command(&[8, 0, PSRAM_CMD_RSTEN], &mut []).await; + Timer::after(Duration::from_micros(50)).await; + psram.send_command(&[8, 0, PSRAM_CMD_RST], &mut []).await; + Timer::after(Duration::from_micros(100)).await; + + defmt::info!("Verifying 1 byte write and read..."); + for i in 0..10u8 { + psram.write8(i as u32, i).await; + } + for i in 0..10u32 { + let n = psram.read8(i as u32).await; + if n as u32 != i {} + } + defmt::info!("testing read again @ 0"); + let mut got = [0u8; 8]; + psram.read(0, &mut got).await; + const EXPECT: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7]; + if got != EXPECT {} + + const DEADBEEF: &[u8] = &[0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xf]; + defmt::info!("testing write of deadbeef at 0"); + psram.write(0, DEADBEEF).await; + + defmt::info!("testing read of deadbeef from 0"); + psram.read(0, &mut got).await; + if got != DEADBEEF { + for addr in 0..DEADBEEF.len() { + let bad = got[addr]; + if bad != DEADBEEF[addr] { + let x = psram.read8(addr as u32).await; + } + } + } + + const TEST_STRING: &[u8] = b"hello there, this is a test, how is it?"; + psram.write(16, TEST_STRING).await; + + let mut buffer = [0u8; 42]; + psram.read(16, &mut buffer).await; + + let got = &buffer[0..TEST_STRING.len()]; + + if got != TEST_STRING {} + + defmt::info!("PSRAM test complete"); + + let id = psram.read_id().await; + // id: [d, 5d, 53, 15, 49, e3, 7c, 7b] + // id[0] -- manufacturer id + // id[1] -- "known good die" status + if id[1] == PSRAM_KNOWN_GOOD_DIE_PASS { + // See + // for information on deciding the size of ESP PSRAM chips, + // such as the one used in the picocalc + let size = match (id[2] >> 5) & 0x7 { + 0 => 16, + 1 => 32, + 2 => 64, + _ => 0, + }; + psram.size = size * 1024 * 1024 / 8; + } + + psram +} + +#[allow(unused)] +async fn test_psram(psram: &mut PsRam) -> bool { + const REPORT_CHUNK: u32 = 256 * 1024; + const BLOCK_SIZE: usize = 8; + let limit = psram.size; //.min(4 * 1024 * 1024); + + let start = Instant::now(); + + fn expect(addr: u32) -> [u8; BLOCK_SIZE] { + [ + !((addr >> 24 & 0xff) as u8), + !((addr >> 16 & 0xff) as u8), + !((addr >> 8 & 0xff) as u8), + !((addr & 0xff) as u8), + ((addr >> 24 & 0xff) as u8), + ((addr >> 16 & 0xff) as u8), + ((addr >> 8 & 0xff) as u8), + ((addr & 0xff) as u8), + ] + } + + for i in 0..limit / BLOCK_SIZE as u32 { + let addr = i * BLOCK_SIZE as u32; + let data = expect(addr); + psram.write(addr, &data).await; + if addr > 0 && addr % REPORT_CHUNK == 0 { + if start.elapsed() > Duration::from_secs(5) {} + } + // Yield so that the watchdog doesn't kick in + yield_now().await; + } + let writes_took = start.elapsed(); + + defmt::info!("Starting reads..."); + Timer::after(Duration::from_millis(200)).await; + + let start = Instant::now(); + let mut bad_count = 0; + let mut data = [0u8; BLOCK_SIZE]; + for i in 0..limit / BLOCK_SIZE as u32 { + let addr = i * BLOCK_SIZE as u32; + let expect = expect(addr); + psram.read(addr, &mut data).await; + if addr == 0 { + Timer::after(Duration::from_millis(200)).await; + } + if data != expect { + bad_count += 1; + if bad_count < 50 {} + } + if addr > 0 && addr % REPORT_CHUNK == 0 { + if start.elapsed() > Duration::from_secs(5) {} + } + + // Yield so that the watchdog doesn't kick in + yield_now().await; + } + let reads_took = start.elapsed(); + + bad_count == 0 +} + +// The origin of the code in this file is: +// +// which is MIT/Apache-2 licensed. +#[unsafe(link_section = ".data")] +#[inline(never)] +pub fn detect_psram_qmi(qmi: &embassy_rp::pac::qmi::Qmi) -> u32 { + const GPIO_FUNC_XIP_CS1: u8 = 9; + const XIP_CS_PIN: usize = 47; + embassy_rp::pac::PADS_BANK0.gpio(XIP_CS_PIN).modify(|w| { + w.set_iso(true); + }); + embassy_rp::pac::PADS_BANK0.gpio(XIP_CS_PIN).modify(|w| { + w.set_ie(true); + w.set_od(false); + }); + embassy_rp::pac::IO_BANK0 + .gpio(XIP_CS_PIN) + .ctrl() + .write(|w| w.set_funcsel(GPIO_FUNC_XIP_CS1)); + embassy_rp::pac::PADS_BANK0.gpio(XIP_CS_PIN).modify(|w| { + w.set_iso(false); + }); + + critical_section::with(|_cs| { + // Try and read the PSRAM ID via direct_csr. + qmi.direct_csr().write(|w| { + w.set_clkdiv(30); + w.set_en(true); + }); + + // Need to poll for the cooldown on the last XIP transfer to expire + // (via direct-mode BUSY flag) before it is safe to perform the first + // direct-mode operation + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + // Exit out of QMI in case we've inited already + qmi.direct_csr().modify(|w| w.set_assert_cs1n(true)); + + // Transmit the command to exit QPI quad mode - read ID as standard SPI + // Transmit as quad. + qmi.direct_tx().write(|w| { + w.set_oe(true); + w.set_iwidth(embassy_rp::pac::qmi::vals::Iwidth::Q); + w.set_data(PSRAM_CMD_QUAD_END.into()); + }); + + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + let _ = qmi.direct_rx().read(); + + qmi.direct_csr().modify(|w| { + w.set_assert_cs1n(false); + }); + + // Read the id + qmi.direct_csr().modify(|w| { + w.set_assert_cs1n(true); + }); + + // kgd is "known good die" + let mut kgd: u16 = 0; + let mut eid: u16 = 0; + for i in 0usize..7 { + qmi.direct_tx().write(|w| { + w.set_data(if i == 0 { + PSRAM_CMD_READ_ID.into() + } else { + PSRAM_CMD_NOOP.into() + }) + }); + + while !qmi.direct_csr().read().txempty() { + // rp235x_hal::arch::nop(); + } + + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + let value = qmi.direct_rx().read().direct_rx(); + match i { + 5 => { + kgd = value; + } + 6 => { + eid = value; + } + _ => {} + } + } + + qmi.direct_csr().modify(|w| { + w.set_assert_cs1n(false); + w.set_en(false); + }); + let mut param_size: u32 = 0; + if kgd == PSRAM_KNOWN_GOOD_DIE_PASS as u16 { + param_size = 1024 * 1024; + let size_id = eid >> 5; + if eid == 0x26 || size_id == 2 { + param_size *= 8; + } else if size_id == 0 { + param_size *= 2; + } else if size_id == 1 { + param_size *= 4; + } + } + param_size + }) +} + +#[unsafe(link_section = ".data")] +#[inline(never)] +pub fn init_psram_qmi( + qmi: &embassy_rp::pac::qmi::Qmi, + xip: &embassy_rp::pac::xip_ctrl::XipCtrl, +) -> u32 { + let psram_size = detect_psram_qmi(qmi); + + if psram_size == 0 { + return 0; + } + + // Set PSRAM timing for APS6404 + // + // Using an rxdelay equal to the divisor isn't enough when running the APS6404 close to 133MHz. + // So: don't allow running at divisor 1 above 100MHz (because delay of 2 would be too late), + // and add an extra 1 to the rxdelay if the divided clock is > 100MHz (i.e. sys clock > 200MHz). + const MAX_PSRAM_FREQ: u32 = 133_000_000; + + let clock_hz = clk_peri_freq(); + + let mut divisor: u32 = (clock_hz + MAX_PSRAM_FREQ - 1) / MAX_PSRAM_FREQ; + if divisor == 1 && clock_hz > 100_000_000 { + divisor = 2; + } + let mut rxdelay: u32 = divisor; + if clock_hz / divisor > 100_000_000 { + rxdelay += 1; + } + + // - Max select must be <= 8us. The value is given in multiples of 64 system clocks. + // - Min deselect must be >= 18ns. The value is given in system clock cycles - ceil(divisor / 2). + let clock_period_fs: u64 = 1_000_000_000_000_000_u64 / u64::from(clock_hz); + let max_select: u8 = ((125 * 1_000_000) / clock_period_fs) as u8; + let min_deselect: u32 = ((18 * 1_000_000 + (clock_period_fs - 1)) / clock_period_fs + - u64::from(divisor + 1) / 2) as u32; + + qmi.direct_csr().write(|w| { + w.set_clkdiv(10); + w.set_en(true); + w.set_auto_cs1n(true); + }); + + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + qmi.direct_tx().write(|w| { + w.set_nopush(true); + w.0 = 0x35; + }); + + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + qmi.mem(1).timing().write(|w| { + w.set_cooldown(1); + w.set_pagebreak(embassy_rp::pac::qmi::vals::Pagebreak::_1024); + w.set_max_select(max_select as u8); + w.set_min_deselect(min_deselect as u8); + w.set_rxdelay(rxdelay as u8); + w.set_clkdiv(divisor as u8); + }); + + // // Set PSRAM commands and formats + qmi.mem(1).rfmt().write(|w| { + w.set_prefix_width(embassy_rp::pac::qmi::vals::PrefixWidth::Q); + w.set_addr_width(embassy_rp::pac::qmi::vals::AddrWidth::Q); + w.set_suffix_width(embassy_rp::pac::qmi::vals::SuffixWidth::Q); + w.set_dummy_width(embassy_rp::pac::qmi::vals::DummyWidth::Q); + w.set_data_width(embassy_rp::pac::qmi::vals::DataWidth::Q); + w.set_prefix_len(embassy_rp::pac::qmi::vals::PrefixLen::_8); + w.set_dummy_len(embassy_rp::pac::qmi::vals::DummyLen::_24); + }); + + qmi.mem(1).rcmd().write(|w| w.0 = 0xEB); + + qmi.mem(1).wfmt().write(|w| { + w.set_prefix_width(embassy_rp::pac::qmi::vals::PrefixWidth::Q); + w.set_addr_width(embassy_rp::pac::qmi::vals::AddrWidth::Q); + w.set_suffix_width(embassy_rp::pac::qmi::vals::SuffixWidth::Q); + w.set_dummy_width(embassy_rp::pac::qmi::vals::DummyWidth::Q); + w.set_data_width(embassy_rp::pac::qmi::vals::DataWidth::Q); + w.set_prefix_len(embassy_rp::pac::qmi::vals::PrefixLen::_8); + }); + + qmi.mem(1).wcmd().write(|w| w.0 = 0x38); + + // Disable direct mode + qmi.direct_csr().write(|w| w.0 = 0); + + // Enable writes to PSRAM + xip.ctrl().modify(|w| w.set_writable_m1(true)); + psram_size +} From 31a3772bcb7548d7f7f7783f874ee4b6d3fdc778 Mon Sep 17 00:00:00 2001 From: sawyer bristol Date: Tue, 14 Oct 2025 14:07:58 -0600 Subject: [PATCH 2/3] spi psram works --- Cargo.lock | 1 + kernel/Cargo.toml | 1 + kernel/src/heap.rs | 133 -------------------- kernel/src/main.rs | 46 ++++++- kernel/src/psram.rs | 290 +++++--------------------------------------- 5 files changed, 75 insertions(+), 396 deletions(-) delete mode 100644 kernel/src/heap.rs diff --git a/Cargo.lock b/Cargo.lock index 5469205..56a52b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1447,6 +1447,7 @@ dependencies = [ "bumpalo", "cortex-m", "cortex-m-rt", + "critical-section", "cyw43", "cyw43-pio", "defmt 0.3.100", diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 2db6363..299866d 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -58,6 +58,7 @@ cyw43 = { version = "0.3.0", features = [ ], optional = true } cyw43-pio = { version = "0.3.0", optional = true } +critical-section = "1.2.0" embedded-hal-bus = { version = "0.3.0", features = ["async"] } embedded-hal = "0.2.7" embedded-hal_2 = { package = "embedded-hal", version = "1.0.0" } diff --git a/kernel/src/heap.rs b/kernel/src/heap.rs deleted file mode 100644 index ee82966..0000000 --- a/kernel/src/heap.rs +++ /dev/null @@ -1,133 +0,0 @@ -// This whole file was taken from -// -// - -use core::alloc::{GlobalAlloc, Layout}; -use core::mem::MaybeUninit; -use core::sync::atomic::{AtomicUsize, Ordering}; -use embedded_alloc::LlffHeap as Heap; - -#[global_allocator] -pub static HEAP: DualHeap = DualHeap::empty(); -const HEAP_SIZE: usize = 64 * 1024; -static mut HEAP_MEM: [MaybeUninit; HEAP_SIZE] = [MaybeUninit::uninit(); HEAP_SIZE]; - -struct Region { - start: AtomicUsize, - size: AtomicUsize, -} - -impl Region { - const fn default() -> Self { - Self { - start: AtomicUsize::new(0), - size: AtomicUsize::new(0), - } - } - - fn contains(&self, address: usize) -> bool { - let start = self.start.load(Ordering::Relaxed); - let end = self.start.load(Ordering::Relaxed); - (start..start + end).contains(&address) - } - - fn new(start: usize, size: usize) -> Self { - Self { - start: AtomicUsize::new(start), - size: AtomicUsize::new(size), - } - } -} - -/// This is an allocator that combines two regions of memory. -/// The intent is to use some of the directly connected RAM -/// for this, and if we find some XIP capable PSRAM, add that -/// as a secondary region. -/// Allocation from the primary region is always preferred, -/// as it is expected to be a bit faster than PSRAM. -/// FIXME: PSRAM-allocated memory isn't compatible with -/// CAS atomics, so we might need a bit of a think about this! -pub struct DualHeap { - primary: Heap, - primary_region: Region, - secondary: Heap, -} - -impl DualHeap { - pub const fn empty() -> Self { - Self { - primary: Heap::empty(), - primary_region: Region::default(), - secondary: Heap::empty(), - } - } - - unsafe fn add_primary(&self, region: Region) { - let start = region.start.load(Ordering::SeqCst); - let size = region.size.load(Ordering::SeqCst); - unsafe { - self.primary.init(start, size); - } - self.primary_region.start.store(start, Ordering::SeqCst); - self.primary_region.size.store(size, Ordering::SeqCst); - } - - unsafe fn add_secondary(&self, region: Region) { - let start = region.start.load(Ordering::SeqCst); - let size = region.size.load(Ordering::SeqCst); - unsafe { - self.secondary.init(start, size); - } - } - - pub fn used(&self) -> usize { - self.primary.used() + self.secondary.used() - } - - pub fn free(&self) -> usize { - self.primary.free() + self.secondary.free() - } -} - -unsafe impl GlobalAlloc for DualHeap { - unsafe fn alloc(&self, layout: Layout) -> *mut u8 { - unsafe { - let ptr = self.primary.alloc(layout); - if !ptr.is_null() { - return ptr; - } - // start using secondary area when primary heap is full - self.secondary.alloc(layout) - } - } - - unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { - unsafe { - let ptr_usize = ptr as usize; - if self.primary_region.contains(ptr_usize) { - self.primary.dealloc(ptr, layout); - } else { - self.secondary.dealloc(ptr, layout); - } - } - } -} - -pub fn init_heap() { - let primary_start = &raw mut HEAP_MEM as usize; - unsafe { HEAP.add_primary(Region::new(primary_start, HEAP_SIZE)) } -} - -pub fn init_qmi_psram_heap(size: u32) { - unsafe { HEAP.add_secondary(Region::new(0x11000000, size as usize)) } -} - -pub async fn free_command(_args: &[&str]) { - let ram_used = HEAP.primary.used(); - let ram_free = HEAP.primary.free(); - let ram_total = ram_used + ram_free; - - let qmi_used = HEAP.secondary.used(); - let qmi_free = HEAP.secondary.free(); - let qmi_total = qmi_used + qmi_free; -} diff --git a/kernel/src/main.rs b/kernel/src/main.rs index 57490ec..a98bd02 100644 --- a/kernel/src/main.rs +++ b/kernel/src/main.rs @@ -11,6 +11,7 @@ mod display; mod elf; mod framebuffer; mod peripherals; +mod psram; mod scsi; mod storage; mod ui; @@ -24,6 +25,7 @@ use crate::{ conf_peripherals, keyboard::{KeyState, read_keyboard_fifo}, }, + psram::init_psram, scsi::MSC_SHUTDOWN, storage::{SDCARD, SdCard}, ui::{SELECTIONS, clear_selection, ui_handler}, @@ -47,9 +49,11 @@ use embassy_rp::{ i2c::{self, I2c}, multicore::{Stack, spawn_core1}, peripherals::{ - DMA_CH0, DMA_CH1, I2C1, PIN_6, PIN_7, PIN_10, PIN_11, PIN_12, PIN_13, PIN_14, PIN_15, - PIN_16, PIN_17, PIN_18, PIN_19, PIN_22, SPI0, SPI1, USB, + DMA_CH0, DMA_CH1, DMA_CH3, DMA_CH4, I2C1, PIN_2, PIN_3, PIN_6, PIN_7, PIN_10, PIN_11, + PIN_12, PIN_13, PIN_14, PIN_15, PIN_16, PIN_17, PIN_18, PIN_19, PIN_20, PIN_21, PIN_22, + PIO0, SPI0, SPI1, USB, }, + pio, spi::{self, Spi}, usb as embassy_rp_usb, }; @@ -65,6 +69,7 @@ use talc::*; embassy_rp::bind_interrupts!(struct Irqs { I2C1_IRQ => i2c::InterruptHandler; USBCTRL_IRQ => embassy_rp_usb::InterruptHandler; + PIO0_IRQ_0 => pio::InterruptHandler; }); static mut CORE1_STACK: Stack<16384> = Stack::new(); @@ -113,13 +118,24 @@ async fn main(_spawner: Spawner) { cs: p.PIN_17, det: p.PIN_22, }; + let psram = Psram { + pio: p.PIO0, + sclk: p.PIN_21, + mosi: p.PIN_2, + miso: p.PIN_3, + cs: p.PIN_20, + dma1: p.DMA_CH3, + dma2: p.DMA_CH4, + }; let mcu = Mcu { i2c: p.I2C1, clk: p.PIN_7, data: p.PIN_6, }; let executor0 = EXECUTOR0.init(Executor::new()); - executor0.run(|spawner| unwrap!(spawner.spawn(kernel_task(spawner, display, sd, mcu, p.USB)))); + executor0.run(|spawner| { + unwrap!(spawner.spawn(kernel_task(spawner, display, sd, psram, mcu, p.USB))) + }); } // One-slot channel to pass EntryFn from core1 @@ -177,6 +193,15 @@ struct Sd { cs: Peri<'static, PIN_17>, det: Peri<'static, PIN_22>, } +struct Psram { + pio: Peri<'static, PIO0>, + sclk: Peri<'static, PIN_21>, + mosi: Peri<'static, PIN_2>, + miso: Peri<'static, PIN_3>, + cs: Peri<'static, PIN_20>, + dma1: Peri<'static, DMA_CH3>, + dma2: Peri<'static, DMA_CH4>, +} struct Mcu { i2c: Peri<'static, I2C1>, clk: Peri<'static, PIN_7>, @@ -207,6 +232,19 @@ async fn setup_display(display: Display, spawner: Spawner) { spawner.spawn(display_handler(display)).unwrap(); } +async fn setup_psram(psram: Psram) { + let psram = init_psram( + psram.pio, psram.sclk, psram.mosi, psram.miso, psram.cs, psram.dma1, psram.dma2, + ) + .await; + + defmt::info!("psram size: {}", psram.size); + + if psram.size == 0 { + defmt::info!("\u{1b}[1mExternal PSRAM was NOT found!\u{1b}[0m"); + } +} + async fn setup_sd(sd: Sd) { let mut config = spi::Config::default(); config.frequency = 400_000; @@ -227,12 +265,14 @@ async fn kernel_task( spawner: Spawner, display: Display, sd: Sd, + psram: Psram, mcu: Mcu, usb: Peri<'static, USB>, ) { setup_mcu(mcu).await; Timer::after_millis(250).await; setup_display(display, spawner).await; + setup_psram(psram).await; setup_sd(sd).await; let _usb = embassy_rp_usb::Driver::new(usb, Irqs); diff --git a/kernel/src/psram.rs b/kernel/src/psram.rs index 3debf78..881b3d7 100644 --- a/kernel/src/psram.rs +++ b/kernel/src/psram.rs @@ -3,15 +3,13 @@ // use crate::Irqs; use embassy_futures::yield_now; -use embassy_rp::PeripheralRef; -use embassy_rp::clocks::clk_peri_freq; -use embassy_rp::gpio::Drive; -use embassy_rp::peripherals::{DMA_CH1, DMA_CH2, PIN_2, PIN_3, PIN_20, PIN_21, PIO1}; +use embassy_rp::Peri; +use embassy_rp::gpio::{Drive, SlewRate}; +use embassy_rp::peripherals::{DMA_CH3, DMA_CH4, PIN_2, PIN_3, PIN_20, PIN_21, PIO0}; use embassy_rp::pio::program::pio_asm; use embassy_rp::pio::{Config, Direction, Pio, ShiftDirection}; +use embassy_rp::pio_programs::clock_divider::calculate_pio_clock_divider; use embassy_time::{Duration, Instant, Timer}; -use fixed::FixedU32; -use fixed::types::extra::U8; // The physical connections in the picocalc schematic are: // LABEL PICO ESP-PSRAM64H @@ -22,10 +20,6 @@ use fixed::types::extra::U8; // RAM_IO2 - PIN_4 SIO2 (QPI Mode) // RAM_IO3 - PIN_5 SIO3 (QPI Mode) -#[allow(unused)] -const PSRAM_CMD_QUAD_END: u8 = 0xf5; -#[allow(unused)] -const PSRAM_CMD_QUAD_ENABLE: u8 = 0x35; #[allow(unused)] const PSRAM_CMD_READ_ID: u8 = 0x9F; const PSRAM_CMD_RSTEN: u8 = 0x66; @@ -33,18 +27,16 @@ const PSRAM_CMD_RST: u8 = 0x99; const PSRAM_CMD_WRITE: u8 = 0x02; const PSRAM_CMD_FAST_READ: u8 = 0x0B; #[allow(unused)] -const PSRAM_CMD_QUAD_READ: u8 = 0xEB; -#[allow(unused)] -const PSRAM_CMD_QUAD_WRITE: u8 = 0x38; -#[allow(unused)] const PSRAM_CMD_NOOP: u8 = 0xFF; #[allow(unused)] const PSRAM_KNOWN_GOOD_DIE_PASS: u8 = 0x5d; +const SPEED: u32 = 133_000_000; + pub struct PsRam { - sm: embassy_rp::pio::StateMachine<'static, PIO1, 0>, - tx_ch: PeripheralRef<'static, DMA_CH1>, - rx_ch: PeripheralRef<'static, DMA_CH2>, + sm: embassy_rp::pio::StateMachine<'static, PIO0, 0>, + tx_ch: Peri<'static, DMA_CH3>, + rx_ch: Peri<'static, DMA_CH4>, pub size: u32, } @@ -177,32 +169,17 @@ impl PsRam { } pub async fn init_psram( - pio_1: PIO1, - sclk: PIN_21, - mosi: PIN_2, - miso: PIN_3, - cs: PIN_20, - dma_ch1: DMA_CH1, - dma_ch2: DMA_CH2, + pio: Peri<'static, PIO0>, + sclk: Peri<'static, PIN_21>, + mosi: Peri<'static, PIN_2>, + miso: Peri<'static, PIN_3>, + cs: Peri<'static, PIN_20>, + dma1: Peri<'static, DMA_CH3>, + dma2: Peri<'static, DMA_CH4>, ) -> PsRam { - let mut pio = Pio::new(pio_1, Irqs); + let mut pio = Pio::new(pio, Irqs); - let clock_hz = FixedU32::from_num(embassy_rp::clocks::clk_sys_freq()); - let max_psram_freq: FixedU32 = FixedU32::from_num(100_000_000); - - let divider = if clock_hz <= max_psram_freq { - FixedU32::from_num(1) - } else { - clock_hz / max_psram_freq - }; - let effective_clock = clock_hz / divider; - use embassy_rp::clocks::*; - defmt::info!( - "pll_sys_freq={} rosc_freq={} xosc_freq={}", - pll_sys_freq(), - rosc_freq(), - xosc_freq() - ); + let divider = calculate_pio_clock_divider(SPEED); // This pio program was taken from // @@ -238,6 +215,9 @@ done: let mut mosi = pio.common.make_pio_pin(mosi); let mut miso = pio.common.make_pio_pin(miso); + sclk.set_slew_rate(SlewRate::Fast); + mosi.set_slew_rate(SlewRate::Fast); + cs.set_drive_strength(Drive::_4mA); sclk.set_drive_strength(Drive::_4mA); mosi.set_drive_strength(Drive::_4mA); @@ -255,6 +235,7 @@ done: cfg.clock_divider = divider; let mut sm = pio.sm0; + sm.restart(); sm.set_pin_dirs(Direction::Out, &[&cs, &sclk]); sm.set_pin_dirs(Direction::Out, &[&mosi]); sm.set_pin_dirs(Direction::In, &[&miso]); @@ -263,13 +244,10 @@ done: sm.set_config(&cfg); sm.set_enable(true); - let dma_ch1 = PeripheralRef::new(dma_ch1); - let dma_ch2 = PeripheralRef::new(dma_ch2); - let mut psram = PsRam { sm, - tx_ch: dma_ch1, - rx_ch: dma_ch2, + tx_ch: dma1, + rx_ch: dma2, size: 0, }; @@ -291,7 +269,9 @@ done: let mut got = [0u8; 8]; psram.read(0, &mut got).await; const EXPECT: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7]; - if got != EXPECT {} + if got != EXPECT { + defmt::warn!("Got Read error"); + } const DEADBEEF: &[u8] = &[0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xf]; defmt::info!("testing write of deadbeef at 0"); @@ -304,6 +284,7 @@ done: let bad = got[addr]; if bad != DEADBEEF[addr] { let x = psram.read8(addr as u32).await; + defmt::info!("read addr: {}, got: {:X}", addr, x); } } } @@ -321,6 +302,7 @@ done: defmt::info!("PSRAM test complete"); let id = psram.read_id().await; + defmt::info!("psram id: {}", id); // id: [d, 5d, 53, 15, 49, e3, 7c, 7b] // id[0] -- manufacturer id // id[1] -- "known good die" status @@ -341,7 +323,7 @@ done: } #[allow(unused)] -async fn test_psram(psram: &mut PsRam) -> bool { +pub async fn test_psram(psram: &mut PsRam) -> bool { const REPORT_CHUNK: u32 = 256 * 1024; const BLOCK_SIZE: usize = 8; let limit = psram.size; //.min(4 * 1024 * 1024); @@ -401,215 +383,3 @@ async fn test_psram(psram: &mut PsRam) -> bool { bad_count == 0 } - -// The origin of the code in this file is: -// -// which is MIT/Apache-2 licensed. -#[unsafe(link_section = ".data")] -#[inline(never)] -pub fn detect_psram_qmi(qmi: &embassy_rp::pac::qmi::Qmi) -> u32 { - const GPIO_FUNC_XIP_CS1: u8 = 9; - const XIP_CS_PIN: usize = 47; - embassy_rp::pac::PADS_BANK0.gpio(XIP_CS_PIN).modify(|w| { - w.set_iso(true); - }); - embassy_rp::pac::PADS_BANK0.gpio(XIP_CS_PIN).modify(|w| { - w.set_ie(true); - w.set_od(false); - }); - embassy_rp::pac::IO_BANK0 - .gpio(XIP_CS_PIN) - .ctrl() - .write(|w| w.set_funcsel(GPIO_FUNC_XIP_CS1)); - embassy_rp::pac::PADS_BANK0.gpio(XIP_CS_PIN).modify(|w| { - w.set_iso(false); - }); - - critical_section::with(|_cs| { - // Try and read the PSRAM ID via direct_csr. - qmi.direct_csr().write(|w| { - w.set_clkdiv(30); - w.set_en(true); - }); - - // Need to poll for the cooldown on the last XIP transfer to expire - // (via direct-mode BUSY flag) before it is safe to perform the first - // direct-mode operation - while qmi.direct_csr().read().busy() { - // rp235x_hal::arch::nop(); - } - - // Exit out of QMI in case we've inited already - qmi.direct_csr().modify(|w| w.set_assert_cs1n(true)); - - // Transmit the command to exit QPI quad mode - read ID as standard SPI - // Transmit as quad. - qmi.direct_tx().write(|w| { - w.set_oe(true); - w.set_iwidth(embassy_rp::pac::qmi::vals::Iwidth::Q); - w.set_data(PSRAM_CMD_QUAD_END.into()); - }); - - while qmi.direct_csr().read().busy() { - // rp235x_hal::arch::nop(); - } - - let _ = qmi.direct_rx().read(); - - qmi.direct_csr().modify(|w| { - w.set_assert_cs1n(false); - }); - - // Read the id - qmi.direct_csr().modify(|w| { - w.set_assert_cs1n(true); - }); - - // kgd is "known good die" - let mut kgd: u16 = 0; - let mut eid: u16 = 0; - for i in 0usize..7 { - qmi.direct_tx().write(|w| { - w.set_data(if i == 0 { - PSRAM_CMD_READ_ID.into() - } else { - PSRAM_CMD_NOOP.into() - }) - }); - - while !qmi.direct_csr().read().txempty() { - // rp235x_hal::arch::nop(); - } - - while qmi.direct_csr().read().busy() { - // rp235x_hal::arch::nop(); - } - - let value = qmi.direct_rx().read().direct_rx(); - match i { - 5 => { - kgd = value; - } - 6 => { - eid = value; - } - _ => {} - } - } - - qmi.direct_csr().modify(|w| { - w.set_assert_cs1n(false); - w.set_en(false); - }); - let mut param_size: u32 = 0; - if kgd == PSRAM_KNOWN_GOOD_DIE_PASS as u16 { - param_size = 1024 * 1024; - let size_id = eid >> 5; - if eid == 0x26 || size_id == 2 { - param_size *= 8; - } else if size_id == 0 { - param_size *= 2; - } else if size_id == 1 { - param_size *= 4; - } - } - param_size - }) -} - -#[unsafe(link_section = ".data")] -#[inline(never)] -pub fn init_psram_qmi( - qmi: &embassy_rp::pac::qmi::Qmi, - xip: &embassy_rp::pac::xip_ctrl::XipCtrl, -) -> u32 { - let psram_size = detect_psram_qmi(qmi); - - if psram_size == 0 { - return 0; - } - - // Set PSRAM timing for APS6404 - // - // Using an rxdelay equal to the divisor isn't enough when running the APS6404 close to 133MHz. - // So: don't allow running at divisor 1 above 100MHz (because delay of 2 would be too late), - // and add an extra 1 to the rxdelay if the divided clock is > 100MHz (i.e. sys clock > 200MHz). - const MAX_PSRAM_FREQ: u32 = 133_000_000; - - let clock_hz = clk_peri_freq(); - - let mut divisor: u32 = (clock_hz + MAX_PSRAM_FREQ - 1) / MAX_PSRAM_FREQ; - if divisor == 1 && clock_hz > 100_000_000 { - divisor = 2; - } - let mut rxdelay: u32 = divisor; - if clock_hz / divisor > 100_000_000 { - rxdelay += 1; - } - - // - Max select must be <= 8us. The value is given in multiples of 64 system clocks. - // - Min deselect must be >= 18ns. The value is given in system clock cycles - ceil(divisor / 2). - let clock_period_fs: u64 = 1_000_000_000_000_000_u64 / u64::from(clock_hz); - let max_select: u8 = ((125 * 1_000_000) / clock_period_fs) as u8; - let min_deselect: u32 = ((18 * 1_000_000 + (clock_period_fs - 1)) / clock_period_fs - - u64::from(divisor + 1) / 2) as u32; - - qmi.direct_csr().write(|w| { - w.set_clkdiv(10); - w.set_en(true); - w.set_auto_cs1n(true); - }); - - while qmi.direct_csr().read().busy() { - // rp235x_hal::arch::nop(); - } - - qmi.direct_tx().write(|w| { - w.set_nopush(true); - w.0 = 0x35; - }); - - while qmi.direct_csr().read().busy() { - // rp235x_hal::arch::nop(); - } - - qmi.mem(1).timing().write(|w| { - w.set_cooldown(1); - w.set_pagebreak(embassy_rp::pac::qmi::vals::Pagebreak::_1024); - w.set_max_select(max_select as u8); - w.set_min_deselect(min_deselect as u8); - w.set_rxdelay(rxdelay as u8); - w.set_clkdiv(divisor as u8); - }); - - // // Set PSRAM commands and formats - qmi.mem(1).rfmt().write(|w| { - w.set_prefix_width(embassy_rp::pac::qmi::vals::PrefixWidth::Q); - w.set_addr_width(embassy_rp::pac::qmi::vals::AddrWidth::Q); - w.set_suffix_width(embassy_rp::pac::qmi::vals::SuffixWidth::Q); - w.set_dummy_width(embassy_rp::pac::qmi::vals::DummyWidth::Q); - w.set_data_width(embassy_rp::pac::qmi::vals::DataWidth::Q); - w.set_prefix_len(embassy_rp::pac::qmi::vals::PrefixLen::_8); - w.set_dummy_len(embassy_rp::pac::qmi::vals::DummyLen::_24); - }); - - qmi.mem(1).rcmd().write(|w| w.0 = 0xEB); - - qmi.mem(1).wfmt().write(|w| { - w.set_prefix_width(embassy_rp::pac::qmi::vals::PrefixWidth::Q); - w.set_addr_width(embassy_rp::pac::qmi::vals::AddrWidth::Q); - w.set_suffix_width(embassy_rp::pac::qmi::vals::SuffixWidth::Q); - w.set_dummy_width(embassy_rp::pac::qmi::vals::DummyWidth::Q); - w.set_data_width(embassy_rp::pac::qmi::vals::DataWidth::Q); - w.set_prefix_len(embassy_rp::pac::qmi::vals::PrefixLen::_8); - }); - - qmi.mem(1).wcmd().write(|w| w.0 = 0x38); - - // Disable direct mode - qmi.direct_csr().write(|w| w.0 = 0); - - // Enable writes to PSRAM - xip.ctrl().modify(|w| w.set_writable_m1(true)); - psram_size -} From 5414725241fa1738effe2d9e99d9986dc8a0af9f Mon Sep 17 00:00:00 2001 From: sawyer bristol Date: Sun, 26 Oct 2025 20:21:34 -0600 Subject: [PATCH 3/3] add pimoroni2w support --- Cargo.lock | 56 ++++++++ Cargo.toml | 1 + README.md | 4 +- justfile | 8 +- kernel/Cargo.toml | 2 + kernel/build.rs | 7 +- kernel/pimoroni2w.x | 60 +++++++++ kernel/{memory.x => rp2350.x} | 0 kernel/src/abi.rs | 6 +- kernel/src/heap.rs | 132 +++++++++++++++++++ kernel/src/main.rs | 25 +++- kernel/src/psram.rs | 242 +++++++++++++++++++++++++++++++++- kernel/src/scsi/mod.rs | 3 + 13 files changed, 533 insertions(+), 13 deletions(-) create mode 100644 kernel/pimoroni2w.x rename kernel/{memory.x => rp2350.x} (100%) create mode 100644 kernel/src/heap.rs diff --git a/Cargo.lock b/Cargo.lock index 56a52b2..b405680 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -128,6 +128,12 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "bit-set" version = "0.5.3" @@ -310,6 +316,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "const-default" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b396d1f76d455557e1218ec8066ae14bba60b4b36ecd55577ba979f5db7ecaa" + [[package]] name = "cortex-m" version = "0.7.7" @@ -880,6 +892,18 @@ dependencies = [ "embedded-io-async", ] +[[package]] +name = "embedded-alloc" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f2de9133f68db0d4627ad69db767726c99ff8585272716708227008d3f1bddd" +dependencies = [ + "const-default", + "critical-section", + "linked_list_allocator", + "rlsf", +] + [[package]] name = "embedded-graphics" version = "0.8.1" @@ -1459,6 +1483,7 @@ dependencies = [ "embassy-sync 0.7.2", "embassy-time 0.5.0", "embassy-usb", + "embedded-alloc", "embedded-graphics", "embedded-hal 0.2.7", "embedded-hal 1.0.0", @@ -1580,6 +1605,12 @@ dependencies = [ "libc", ] +[[package]] +name = "linked_list_allocator" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afa463f5405ee81cdb9cc2baf37e08ec7e4c8209442b5d72c04cfb2cd6e6286" + [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -2065,6 +2096,18 @@ dependencies = [ "bytemuck", ] +[[package]] +name = "rlsf" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222fb240c3286247ecdee6fa5341e7cdad0ffdf8e7e401d9937f2d58482a20bf" +dependencies = [ + "cfg-if", + "const-default", + "libc", + "svgbobdoc", +] + [[package]] name = "rp-pac" version = "7.0.0" @@ -2351,6 +2394,19 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "svgbobdoc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c04b93fc15d79b39c63218f15e3fdffaa4c227830686e3b7c5f41244eb3e50" +dependencies = [ + "base64", + "proc-macro2", + "quote", + "syn 1.0.109", + "unicode-width", +] + [[package]] name = "syn" version = "1.0.109" diff --git a/Cargo.toml b/Cargo.toml index 5f3e4be..ef81558 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,7 @@ resolver = "3" members = [ "kernel", + "abi_sys", "abi", "user-apps/calculator", "user-apps/snake", diff --git a/README.md b/README.md index 9bffec0..2d4d4e0 100644 --- a/README.md +++ b/README.md @@ -29,4 +29,6 @@ git clone https://github.com/LegitCamper/picocalc-os-rs.git cd picocalc-os-rs just userapps # copy the build applications from target/thumbv8m.main-none-eabihf/release-binary/application to the sdcard and rename them to app.bin -just kernel-release # keep in mind that https://github.com/StripedMonkey/elf2uf2-rs version is required until https://github.com/JoNil/elf2uf2-rs/pull/41 is merged + +# has builds for the official rp2350 board and the pimoroni2w board +just kernel-release rp235x # keep in mind that https://github.com/StripedMonkey/elf2uf2-rs version is required until https://github.com/JoNil/elf2uf2-rs/pull/41 is merged diff --git a/justfile b/justfile index 0f1a419..2175ae6 100644 --- a/justfile +++ b/justfile @@ -1,7 +1,7 @@ -kernel-dev: - cargo run --bin kernel -kernel-release: - cargo build --bin kernel --release +kernel-dev board: + cargo run --bin kernel --features {{board}} +kernel-release board: + cargo build --bin kernel --release --no-default-features --features {{board}} elf2uf2-rs -d target/thumbv8m.main-none-eabihf/release/kernel binary-args := "RUSTFLAGS=\"-C link-arg=-pie -C relocation-model=pic\"" diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 299866d..b2f237a 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -12,6 +12,7 @@ bench = false [features] default = ["rp235x", "defmt"] +pimoroni2w = ["rp235x"] rp2040 = ["embassy-rp/rp2040"] rp235x = ["embassy-rp/rp235xb"] trouble = ["dep:bt-hci", "dep:cyw43", "dep:cyw43-pio", "dep:trouble-host"] @@ -89,6 +90,7 @@ spin = "0.10.0" num_enum = { version = "0.7.4", default-features = false } goblin = { version = "0.10.1", default-features = false, features = ["elf32"] } talc = "4.4.3" +embedded-alloc = "0.6.0" bumpalo = "3.19.0" abi_sys = { path = "../abi_sys" } diff --git a/kernel/build.rs b/kernel/build.rs index 30691aa..8220df0 100644 --- a/kernel/build.rs +++ b/kernel/build.rs @@ -13,13 +13,18 @@ use std::fs::File; use std::io::Write; use std::path::PathBuf; +#[cfg(all(feature = "rp235x", not(feature = "pimoroni2w")))] +const MEMORY: &'static [u8] = include_bytes!("rp2350.x"); +#[cfg(feature = "pimoroni2w")] +const MEMORY: &'static [u8] = include_bytes!("rp2350.x"); + fn main() { // Put `memory.x` in our output directory and ensure it's // on the linker search path. let out = &PathBuf::from(env::var_os("OUT_DIR").unwrap()); File::create(out.join("memory.x")) .unwrap() - .write_all(include_bytes!("memory.x")) + .write_all(MEMORY) .unwrap(); println!("cargo:rustc-link-search={}", out.display()); diff --git a/kernel/pimoroni2w.x b/kernel/pimoroni2w.x new file mode 100644 index 0000000..56e741f --- /dev/null +++ b/kernel/pimoroni2w.x @@ -0,0 +1,60 @@ +MEMORY { + FLASH : ORIGIN = 0x10000000, LENGTH = 16M - 4K + + RAM : ORIGIN = 0x20000000, LENGTH = 512K + SRAM4 : ORIGIN = 0x20080000, LENGTH = 4K + SRAM5 : ORIGIN = 0x20081000, LENGTH = 4K +} + +SECTIONS { + /* ### Boot ROM info + * + * Goes after .vector_table, to keep it in the first 4K of flash + * where the Boot ROM (and picotool) can find it + */ + .start_block : ALIGN(4) + { + __start_block_addr = .; + KEEP(*(.start_block)); + KEEP(*(.boot_info)); + } > FLASH + +} INSERT AFTER .vector_table; + +/* move .text to start /after/ the boot info */ +_stext = ADDR(.start_block) + SIZEOF(.start_block); + +SECTIONS { + /* ### Picotool 'Binary Info' Entries + * + * Picotool looks through this block (as we have pointers to it in our + * header) to find interesting information. + */ + .bi_entries : ALIGN(4) + { + /* We put this in the header */ + __bi_entries_start = .; + /* Here are the entries */ + KEEP(*(.bi_entries)); + /* Keep this block a nice round size */ + . = ALIGN(4); + /* We put this in the header */ + __bi_entries_end = .; + } > FLASH +} INSERT AFTER .text; + +SECTIONS { + /* ### Boot ROM extra info + * + * Goes after everything in our program, so it can contain a signature. + */ + .end_block : ALIGN(4) + { + __end_block_addr = .; + KEEP(*(.end_block)); + } > FLASH + +} INSERT AFTER .uninit; + +PROVIDE(start_to_end = __end_block_addr - __start_block_addr); +PROVIDE(end_to_start = __start_block_addr - __end_block_addr); diff --git a/kernel/memory.x b/kernel/rp2350.x similarity index 100% rename from kernel/memory.x rename to kernel/rp2350.x diff --git a/kernel/src/abi.rs b/kernel/src/abi.rs index 3bed7c0..027b021 100644 --- a/kernel/src/abi.rs +++ b/kernel/src/abi.rs @@ -20,9 +20,11 @@ pub extern "C" fn print(ptr: *const u8, len: usize) { // SAFETY: caller guarantees `ptr` is valid for `len` bytes let slice = unsafe { core::slice::from_raw_parts(ptr, len) }; - if let Ok(msg) = core::str::from_utf8(slice) { - defmt::info!("print: {}", msg); + if let Ok(_msg) = core::str::from_utf8(slice) { + #[cfg(feature = "defmt")] + defmt::info!("print: {}", _msg); } else { + #[cfg(feature = "defmt")] defmt::warn!("print: "); } } diff --git a/kernel/src/heap.rs b/kernel/src/heap.rs new file mode 100644 index 0000000..c4f444c --- /dev/null +++ b/kernel/src/heap.rs @@ -0,0 +1,132 @@ +// This whole file was taken from: +// https://github.com/wezterm/picocalc-wezterm/blob/main/src/heap.rs + +use core::alloc::{GlobalAlloc, Layout}; +use core::mem::MaybeUninit; +use core::sync::atomic::{AtomicUsize, Ordering}; +use embedded_alloc::LlffHeap as Heap; + +#[global_allocator] +pub static HEAP: DualHeap = DualHeap::empty(); +const HEAP_SIZE: usize = 64 * 1024; +static mut HEAP_MEM: [MaybeUninit; HEAP_SIZE] = [MaybeUninit::uninit(); HEAP_SIZE]; + +struct Region { + start: AtomicUsize, + size: AtomicUsize, +} + +impl Region { + const fn default() -> Self { + Self { + start: AtomicUsize::new(0), + size: AtomicUsize::new(0), + } + } + + fn contains(&self, address: usize) -> bool { + let start = self.start.load(Ordering::Relaxed); + let end = self.start.load(Ordering::Relaxed); + (start..start + end).contains(&address) + } + + fn new(start: usize, size: usize) -> Self { + Self { + start: AtomicUsize::new(start), + size: AtomicUsize::new(size), + } + } +} + +/// This is an allocator that combines two regions of memory. +/// The intent is to use some of the directly connected RAM +/// for this, and if we find some XIP capable PSRAM, add that +/// as a secondary region. +/// Allocation from the primary region is always preferred, +/// as it is expected to be a bit faster than PSRAM. +/// FIXME: PSRAM-allocated memory isn't compatible with +/// CAS atomics, so we might need a bit of a think about this! +pub struct DualHeap { + primary: Heap, + primary_region: Region, + secondary: Heap, +} + +impl DualHeap { + pub const fn empty() -> Self { + Self { + primary: Heap::empty(), + primary_region: Region::default(), + secondary: Heap::empty(), + } + } + + unsafe fn add_primary(&self, region: Region) { + let start = region.start.load(Ordering::SeqCst); + let size = region.size.load(Ordering::SeqCst); + unsafe { + self.primary.init(start, size); + } + self.primary_region.start.store(start, Ordering::SeqCst); + self.primary_region.size.store(size, Ordering::SeqCst); + } + + unsafe fn add_secondary(&self, region: Region) { + let start = region.start.load(Ordering::SeqCst); + let size = region.size.load(Ordering::SeqCst); + unsafe { + self.secondary.init(start, size); + } + } + + pub fn used(&self) -> usize { + self.primary.used() + self.secondary.used() + } + + pub fn free(&self) -> usize { + self.primary.free() + self.secondary.free() + } +} + +unsafe impl GlobalAlloc for DualHeap { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + unsafe { + let ptr = self.primary.alloc(layout); + if !ptr.is_null() { + return ptr; + } + // start using secondary area when primary heap is full + self.secondary.alloc(layout) + } + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + unsafe { + let ptr_usize = ptr as usize; + if self.primary_region.contains(ptr_usize) { + self.primary.dealloc(ptr, layout); + } else { + self.secondary.dealloc(ptr, layout); + } + } + } +} + +pub fn init_heap() { + let primary_start = &raw mut HEAP_MEM as usize; + unsafe { HEAP.add_primary(Region::new(primary_start, HEAP_SIZE)) } +} + +pub fn init_qmi_psram_heap(size: u32) { + unsafe { HEAP.add_secondary(Region::new(0x11000000, size as usize)) } +} + +pub async fn free_command(_args: &[&str]) { + let ram_used = HEAP.primary.used(); + let ram_free = HEAP.primary.free(); + let ram_total = ram_used + ram_free; + + let qmi_used = HEAP.secondary.used(); + let qmi_free = HEAP.secondary.free(); + let qmi_total = qmi_used + qmi_free; +} diff --git a/kernel/src/main.rs b/kernel/src/main.rs index a98bd02..887b65d 100644 --- a/kernel/src/main.rs +++ b/kernel/src/main.rs @@ -10,6 +10,8 @@ mod abi; mod display; mod elf; mod framebuffer; +#[cfg(feature = "pimoroni2w")] +mod heap; mod peripherals; mod psram; mod scsi; @@ -18,6 +20,9 @@ mod ui; mod usb; mod utils; +#[cfg(feature = "pimoroni2w")] +use crate::{heap::init_qmi_psram_heap, psram::init_psram_qmi}; + use crate::{ abi::{KEY_CACHE, MS_SINCE_LAUNCH}, display::{FRAMEBUFFER, display_handler, init_display}, @@ -40,7 +45,6 @@ use embedded_graphics::{ use {defmt_rtt as _, panic_probe as _}; use core::sync::atomic::{AtomicBool, Ordering}; -use defmt::unwrap; use embassy_executor::{Executor, Spawner}; use embassy_futures::{join::join, select::select}; use embassy_rp::{ @@ -76,8 +80,10 @@ static mut CORE1_STACK: Stack<16384> = Stack::new(); static EXECUTOR0: StaticCell = StaticCell::new(); static EXECUTOR1: StaticCell = StaticCell::new(); +#[cfg(not(feature = "pimoroni2w"))] static mut ARENA: [u8; 200 * 1024] = [0; 200 * 1024]; +#[cfg(not(feature = "pimoroni2w"))] #[global_allocator] static ALLOCATOR: Talck, ClaimOnOom> = Talc::new(unsafe { ClaimOnOom::new(Span::from_array(core::ptr::addr_of!(ARENA).cast_mut())) }) @@ -95,7 +101,7 @@ async fn main(_spawner: Spawner) { unsafe { &mut *core::ptr::addr_of_mut!(CORE1_STACK) }, move || { let executor1 = EXECUTOR1.init(Executor::new()); - executor1.run(|spawner| unwrap!(spawner.spawn(userland_task()))); + executor1.run(|spawner| spawner.spawn(userland_task()).unwrap()); }, ); @@ -134,7 +140,9 @@ async fn main(_spawner: Spawner) { }; let executor0 = EXECUTOR0.init(Executor::new()); executor0.run(|spawner| { - unwrap!(spawner.spawn(kernel_task(spawner, display, sd, psram, mcu, p.USB))) + spawner + .spawn(kernel_task(spawner, display, sd, psram, mcu, p.USB)) + .unwrap() }); } @@ -159,6 +167,7 @@ async fn userland_task() { } unsafe { MS_SINCE_LAUNCH = Some(Instant::now()) }; + #[cfg(feature = "defmt")] defmt::info!("Executing Binary"); entry(); @@ -238,11 +247,21 @@ async fn setup_psram(psram: Psram) { ) .await; + #[cfg(feature = "defmt")] defmt::info!("psram size: {}", psram.size); if psram.size == 0 { + #[cfg(feature = "defmt")] defmt::info!("\u{1b}[1mExternal PSRAM was NOT found!\u{1b}[0m"); } + + #[cfg(feature = "pimoroni2w")] + { + let psram_qmi_size = init_psram_qmi(&embassy_rp::pac::QMI, &embassy_rp::pac::XIP_CTRL); + if psram_qmi_size > 0 { + init_qmi_psram_heap(psram_qmi_size); + } + } } async fn setup_sd(sd: Sd) { diff --git a/kernel/src/psram.rs b/kernel/src/psram.rs index 881b3d7..9145090 100644 --- a/kernel/src/psram.rs +++ b/kernel/src/psram.rs @@ -4,6 +4,7 @@ use crate::Irqs; use embassy_futures::yield_now; use embassy_rp::Peri; +use embassy_rp::clocks::clk_peri_freq; use embassy_rp::gpio::{Drive, SlewRate}; use embassy_rp::peripherals::{DMA_CH3, DMA_CH4, PIN_2, PIN_3, PIN_20, PIN_21, PIO0}; use embassy_rp::pio::program::pio_asm; @@ -20,6 +21,10 @@ use embassy_time::{Duration, Instant, Timer}; // RAM_IO2 - PIN_4 SIO2 (QPI Mode) // RAM_IO3 - PIN_5 SIO3 (QPI Mode) +#[allow(unused)] +const PSRAM_CMD_QUAD_END: u8 = 0xf5; +#[allow(unused)] +const PSRAM_CMD_QUAD_ENABLE: u8 = 0x35; #[allow(unused)] const PSRAM_CMD_READ_ID: u8 = 0x9F; const PSRAM_CMD_RSTEN: u8 = 0x66; @@ -27,11 +32,15 @@ const PSRAM_CMD_RST: u8 = 0x99; const PSRAM_CMD_WRITE: u8 = 0x02; const PSRAM_CMD_FAST_READ: u8 = 0x0B; #[allow(unused)] +const PSRAM_CMD_QUAD_READ: u8 = 0xEB; +#[allow(unused)] +const PSRAM_CMD_QUAD_WRITE: u8 = 0x38; +#[allow(unused)] const PSRAM_CMD_NOOP: u8 = 0xFF; #[allow(unused)] const PSRAM_KNOWN_GOOD_DIE_PASS: u8 = 0x5d; -const SPEED: u32 = 133_000_000; +const MAX_PSRAM_FREQ: u32 = 133_000_000; pub struct PsRam { sm: embassy_rp::pio::StateMachine<'static, PIO0, 0>, @@ -179,7 +188,7 @@ pub async fn init_psram( ) -> PsRam { let mut pio = Pio::new(pio, Irqs); - let divider = calculate_pio_clock_divider(SPEED); + let divider = calculate_pio_clock_divider(MAX_PSRAM_FREQ); // This pio program was taken from // @@ -257,6 +266,7 @@ done: psram.send_command(&[8, 0, PSRAM_CMD_RST], &mut []).await; Timer::after(Duration::from_micros(100)).await; + #[cfg(feature = "defmt")] defmt::info!("Verifying 1 byte write and read..."); for i in 0..10u8 { psram.write8(i as u32, i).await; @@ -265,18 +275,22 @@ done: let n = psram.read8(i as u32).await; if n as u32 != i {} } + #[cfg(feature = "defmt")] defmt::info!("testing read again @ 0"); let mut got = [0u8; 8]; psram.read(0, &mut got).await; const EXPECT: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7]; if got != EXPECT { + #[cfg(feature = "defmt")] defmt::warn!("Got Read error"); } const DEADBEEF: &[u8] = &[0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xf]; + #[cfg(feature = "defmt")] defmt::info!("testing write of deadbeef at 0"); psram.write(0, DEADBEEF).await; + #[cfg(feature = "defmt")] defmt::info!("testing read of deadbeef from 0"); psram.read(0, &mut got).await; if got != DEADBEEF { @@ -284,6 +298,7 @@ done: let bad = got[addr]; if bad != DEADBEEF[addr] { let x = psram.read8(addr as u32).await; + #[cfg(feature = "defmt")] defmt::info!("read addr: {}, got: {:X}", addr, x); } } @@ -299,9 +314,11 @@ done: if got != TEST_STRING {} + #[cfg(feature = "defmt")] defmt::info!("PSRAM test complete"); let id = psram.read_id().await; + #[cfg(feature = "defmt")] defmt::info!("psram id: {}", id); // id: [d, 5d, 53, 15, 49, e3, 7c, 7b] // id[0] -- manufacturer id @@ -355,6 +372,7 @@ pub async fn test_psram(psram: &mut PsRam) -> bool { } let writes_took = start.elapsed(); + #[cfg(feature = "defmt")] defmt::info!("Starting reads..."); Timer::after(Duration::from_millis(200)).await; @@ -383,3 +401,223 @@ pub async fn test_psram(psram: &mut PsRam) -> bool { bad_count == 0 } + +// The origin of the code in this file is: +// +// which is MIT/Apache-2 licensed. +#[unsafe(link_section = ".data")] +#[inline(never)] +pub fn detect_psram_qmi(qmi: &embassy_rp::pac::qmi::Qmi) -> u32 { + const GPIO_FUNC_XIP_CS1: u8 = 9; + const XIP_CS_PIN: usize = 47; + embassy_rp::pac::PADS_BANK0.gpio(XIP_CS_PIN).modify(|w| { + w.set_iso(true); + }); + embassy_rp::pac::PADS_BANK0.gpio(XIP_CS_PIN).modify(|w| { + w.set_ie(true); + w.set_od(false); + }); + embassy_rp::pac::IO_BANK0 + .gpio(XIP_CS_PIN) + .ctrl() + .write(|w| w.set_funcsel(GPIO_FUNC_XIP_CS1)); + embassy_rp::pac::PADS_BANK0.gpio(XIP_CS_PIN).modify(|w| { + w.set_iso(false); + }); + + critical_section::with(|_cs| { + // Try and read the PSRAM ID via direct_csr. + qmi.direct_csr().write(|w| { + w.set_clkdiv(30); + w.set_en(true); + }); + + // Need to poll for the cooldown on the last XIP transfer to expire + // (via direct-mode BUSY flag) before it is safe to perform the first + // direct-mode operation + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + // Exit out of QMI in case we've inited already + qmi.direct_csr().modify(|w| w.set_assert_cs1n(true)); + + // Transmit the command to exit QPI quad mode - read ID as standard SPI + // Transmit as quad. + qmi.direct_tx().write(|w| { + w.set_oe(true); + w.set_iwidth(embassy_rp::pac::qmi::vals::Iwidth::Q); + w.set_data(PSRAM_CMD_QUAD_END.into()); + }); + + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + let _ = qmi.direct_rx().read(); + + qmi.direct_csr().modify(|w| { + w.set_assert_cs1n(false); + }); + + // Read the id + qmi.direct_csr().modify(|w| { + w.set_assert_cs1n(true); + }); + + // kgd is "known good die" + let mut kgd: u16 = 0; + let mut eid: u16 = 0; + for i in 0usize..7 { + qmi.direct_tx().write(|w| { + w.set_data(if i == 0 { + PSRAM_CMD_READ_ID.into() + } else { + PSRAM_CMD_NOOP.into() + }) + }); + + while !qmi.direct_csr().read().txempty() { + // rp235x_hal::arch::nop(); + } + + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + let value = qmi.direct_rx().read().direct_rx(); + match i { + 5 => { + kgd = value; + } + 6 => { + eid = value; + } + _ => {} + } + } + + qmi.direct_csr().modify(|w| { + w.set_assert_cs1n(false); + w.set_en(false); + }); + let mut param_size: u32 = 0; + if kgd == PSRAM_KNOWN_GOOD_DIE_PASS as u16 { + param_size = 1024 * 1024; + let size_id = eid >> 5; + if eid == 0x26 || size_id == 2 { + param_size *= 8; + } else if size_id == 0 { + param_size *= 2; + } else if size_id == 1 { + param_size *= 4; + } + } + param_size + }) +} + +#[unsafe(link_section = ".data")] +#[inline(never)] +pub fn init_psram_qmi( + qmi: &embassy_rp::pac::qmi::Qmi, + xip: &embassy_rp::pac::xip_ctrl::XipCtrl, +) -> u32 { + let psram_size = detect_psram_qmi(qmi); + + if psram_size == 0 { + return 0; + } + + // Set PSRAM timing for APS6404 + // + // Using an rxdelay equal to the divisor isn't enough when running the APS6404 close to 133MHz. + // So: don't allow running at divisor 1 above 100MHz (because delay of 2 would be too late), + // and add an extra 1 to the rxdelay if the divided clock is > 100MHz (i.e. sys clock > 200MHz). + const MAX_PSRAM_FREQ: u32 = 133_000_000; + + let clock_hz = clk_peri_freq(); + + let mut divisor: u32 = (clock_hz + MAX_PSRAM_FREQ - 1) / MAX_PSRAM_FREQ; + if divisor == 1 && clock_hz > 100_000_000 { + divisor = 2; + } + let mut rxdelay: u32 = divisor; + if clock_hz / divisor > 100_000_000 { + rxdelay += 1; + } + + // - Max select must be <= 8us. The value is given in multiples of 64 system clocks. + // - Min deselect must be >= 18ns. The value is given in system clock cycles - ceil(divisor / 2). + let clock_period_fs: u64 = 1_000_000_000_000_000_u64 / u64::from(clock_hz); + let max_select: u8 = ((125 * 1_000_000) / clock_period_fs) as u8; + let min_deselect: u32 = ((18 * 1_000_000 + (clock_period_fs - 1)) / clock_period_fs + - u64::from(divisor + 1) / 2) as u32; + + #[cfg(feature = "defmt")] + defmt::info!( + "clock_period_fs={} max_select={} min_deselect={}", + clock_period_fs, + max_select, + min_deselect + ); + + qmi.direct_csr().write(|w| { + w.set_clkdiv(10); + w.set_en(true); + w.set_auto_cs1n(true); + }); + + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + qmi.direct_tx().write(|w| { + w.set_nopush(true); + w.0 = 0x35; + }); + + while qmi.direct_csr().read().busy() { + // rp235x_hal::arch::nop(); + } + + qmi.mem(1).timing().write(|w| { + w.set_cooldown(1); + w.set_pagebreak(embassy_rp::pac::qmi::vals::Pagebreak::_1024); + w.set_max_select(max_select as u8); + w.set_min_deselect(min_deselect as u8); + w.set_rxdelay(rxdelay as u8); + w.set_clkdiv(divisor as u8); + }); + + // // Set PSRAM commands and formats + qmi.mem(1).rfmt().write(|w| { + w.set_prefix_width(embassy_rp::pac::qmi::vals::PrefixWidth::Q); + w.set_addr_width(embassy_rp::pac::qmi::vals::AddrWidth::Q); + w.set_suffix_width(embassy_rp::pac::qmi::vals::SuffixWidth::Q); + w.set_dummy_width(embassy_rp::pac::qmi::vals::DummyWidth::Q); + w.set_data_width(embassy_rp::pac::qmi::vals::DataWidth::Q); + w.set_prefix_len(embassy_rp::pac::qmi::vals::PrefixLen::_8); + w.set_dummy_len(embassy_rp::pac::qmi::vals::DummyLen::_24); + }); + + qmi.mem(1).rcmd().write(|w| w.0 = 0xEB); + + qmi.mem(1).wfmt().write(|w| { + w.set_prefix_width(embassy_rp::pac::qmi::vals::PrefixWidth::Q); + w.set_addr_width(embassy_rp::pac::qmi::vals::AddrWidth::Q); + w.set_suffix_width(embassy_rp::pac::qmi::vals::SuffixWidth::Q); + w.set_dummy_width(embassy_rp::pac::qmi::vals::DummyWidth::Q); + w.set_data_width(embassy_rp::pac::qmi::vals::DataWidth::Q); + w.set_prefix_len(embassy_rp::pac::qmi::vals::PrefixLen::_8); + }); + + qmi.mem(1).wcmd().write(|w| w.0 = 0x38); + + // Disable direct mode + qmi.direct_csr().write(|w| w.0 = 0); + + // Enable writes to PSRAM + xip.ctrl().modify(|w| w.set_writable_m1(true)); + psram_size +} diff --git a/kernel/src/scsi/mod.rs b/kernel/src/scsi/mod.rs index b61acee..9618563 100644 --- a/kernel/src/scsi/mod.rs +++ b/kernel/src/scsi/mod.rs @@ -54,6 +54,7 @@ impl<'d, 's, D: Driver<'d>> MassStorageClass<'d, D> { select(self.handle_cbw(), MSC_SHUTDOWN.wait()).await; if MSC_SHUTDOWN.signaled() { + #[cfg(feature = "defmt")] defmt::info!("MSC shutting down"); if self.temp_sd.is_some() { @@ -80,6 +81,7 @@ impl<'d, 's, D: Driver<'d>> MassStorageClass<'d, D> { if let Some(sd) = guard.take() { self.temp_sd = Some(sd); } else { + #[cfg(feature = "defmt")] defmt::warn!("Tried to take SDCARD but it was already taken"); return; } @@ -363,6 +365,7 @@ impl<'d, 's, D: Driver<'d>> MassStorageClass<'d, D> { } pub async fn send_csw_fail(&mut self, tag: u32) { + #[cfg(feature = "defmt")] defmt::error!("Command Failed: {}", tag); self.send_csw(tag, 0x01, 0).await; // 0x01 = Command Failed }