diff --git a/.gitignore b/.gitignore index 4d03431..25f82e7 100644 --- a/.gitignore +++ b/.gitignore @@ -56,3 +56,4 @@ vga.pio.h Makefile CMakeDoxyfile.in CMakeDoxygenDefaults.cmake +picotool/ diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index c8c0fa7..b469971 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,30 +1,30 @@ -add_subdirectory(vga_ants) -add_subdirectory(vga_balloons) -add_subdirectory(vga_draw) -add_subdirectory(vga_earth) -add_subdirectory(vga_eggs) -add_subdirectory(vga_fifteen) -add_subdirectory(vga_flag) -add_subdirectory(vga_ghostracing) -add_subdirectory(vga_gingerhouse) +#add_subdirectory(vga_ants) +#add_subdirectory(vga_balloons) +#add_subdirectory(vga_draw) +#add_subdirectory(vga_earth) +#add_subdirectory(vga_eggs) +#add_subdirectory(vga_fifteen) +#add_subdirectory(vga_flag) +#add_subdirectory(vga_ghostracing) +#add_subdirectory(vga_gingerhouse) add_subdirectory(vga_hello) -add_subdirectory(vga_hypno) -add_subdirectory(vga_levelmeter) -add_subdirectory(vga_life) -add_subdirectory(vga_lines) -add_subdirectory(vga_mandelbrot) -add_subdirectory(vga_matrixrain) -add_subdirectory(vga_maze) -add_subdirectory(vga_monoscope) -add_subdirectory(vga_oscilloscope) -add_subdirectory(vga_pacman) -add_subdirectory(vga_pi) -add_subdirectory(vga_pixels) -add_subdirectory(vga_raytrace) -add_subdirectory(vga_sokoban) -add_subdirectory(vga_spheres) -add_subdirectory(vga_spots) -add_subdirectory(vga_tetris) -add_subdirectory(vga_train) -add_subdirectory(vga_twister) -add_subdirectory(vga_watersurface) +#add_subdirectory(vga_hypno) +#add_subdirectory(vga_levelmeter) +#add_subdirectory(vga_life) +#add_subdirectory(vga_lines) +#add_subdirectory(vga_mandelbrot) +#add_subdirectory(vga_matrixrain) +#add_subdirectory(vga_maze) +#add_subdirectory(vga_monoscope) +#add_subdirectory(vga_oscilloscope) +#add_subdirectory(vga_pacman) +#add_subdirectory(vga_pi) +#add_subdirectory(vga_pixels) +#add_subdirectory(vga_raytrace) +#add_subdirectory(vga_sokoban) +#add_subdirectory(vga_spheres) +#add_subdirectory(vga_spots) +#add_subdirectory(vga_tetris) +#add_subdirectory(vga_train) +#add_subdirectory(vga_twister) +#add_subdirectory(vga_watersurface) diff --git a/examples/vga_hello/CMakeLists.txt b/examples/vga_hello/CMakeLists.txt index c01079b..46f2a14 100644 --- a/examples/vga_hello/CMakeLists.txt +++ b/examples/vga_hello/CMakeLists.txt @@ -12,5 +12,8 @@ target_include_directories(vga_hello PRIVATE ${CMAKE_CURRENT_LIST_DIR}/src ) +pico_enable_stdio_usb(vga_hello 1) + # create map/bin/hex file etc. pico_add_extra_outputs(vga_hello) + diff --git a/examples/vga_hello/src/main.cpp b/examples/vga_hello/src/main.cpp index a155163..4f946d9 100644 --- a/examples/vga_hello/src/main.cpp +++ b/examples/vga_hello/src/main.cpp @@ -7,16 +7,76 @@ #include "picovga.h" +#include "pico/stdlib.h" +#include "pico/time.h" +#include +#include +#include "vga_config.h" +#include "hardware/pio.h" + +extern volatile uint32_t g_isr_count; // how many times VgaLine() fired +extern volatile uint32_t g_last_scanline; // last ScanLine observed in ISR +extern volatile uint32_t g_last_linetype; // last linetype observed in ISR // Draw box ALIGNED u8 Box[512*400]; int main() { - // initialize videomode + stdio_init_all(); + sleep_ms(10000); // allow USB CDC to connect (helpful on some hosts) + + printf("\n\nPicoVGA test\n"); + + // initialize videomode Video(DEV_VGA, RES_EGA, FORM_8BIT, Box); + sleep_ms(1000); + // draw text DrawText(&Canvas, "Hello World!", (512-12*8*4)/2, (400-8*4)/2, COL_WHITE, FontBoldB8x16, 16, 4, 4); + + uint32_t last = 0; + absolute_time_t next = make_timeout_time_ms(1000); + + while (true) { + if (absolute_time_diff_us(get_absolute_time(), next) <= 0) { + uint32_t now = g_isr_count; + uint32_t delta = now - last; + last = now; + + printf("ISR/s=%lu last_scanline=%lu last_linetype=%lu\n", + (unsigned long)delta, + (unsigned long)g_last_scanline, + (unsigned long)g_last_linetype); + + uint32_t c = dma_hw->ch[VGA_DMA_PIO0].ctrl_trig; + uint32_t i = dma_hw->ints0; + printf("DMA_PIO0: BUSY=%u AHB_ERR=%u READ_ERR=%u WRITE_ERR=%u ints0=0x%08lx\n", + (c >> DMA_CH0_CTRL_TRIG_BUSY_LSB) & 1, + (c >> DMA_CH0_CTRL_TRIG_AHB_ERROR_LSB) & 1, + (c >> DMA_CH0_CTRL_TRIG_READ_ERROR_LSB) & 1, + (c >> DMA_CH0_CTRL_TRIG_WRITE_ERROR_LSB) & 1, + (unsigned long)i); + + printf("DATA: cnt=%u rd=%08x wr=%08x ctrl=%08x\n", + dma_hw->ch[VGA_DMA_PIO0].transfer_count, + (unsigned)dma_hw->ch[VGA_DMA_PIO0].read_addr, + (unsigned)&VGA_PIO->txf[VGA_SM(0)], + dma_hw->ch[VGA_DMA_PIO0].ctrl_trig); + printf("CB0: cnt=%u rd=%08x wr=%08x ctrl=%08x\n", + dma_hw->ch[VGA_DMA_CB0].transfer_count, + (unsigned)dma_hw->ch[VGA_DMA_CB0].read_addr, + (unsigned)&dma_hw->ch[VGA_DMA_PIO0].al3_transfer_count, + dma_hw->ch[VGA_DMA_CB0].ctrl_trig); + printf("PIO: fstat=%08x txlvl=%u sm0.addr=%02x\n", + VGA_PIO->fstat, + (unsigned)((VGA_PIO->flevel >> (VGA_SM0*8)) & 0x1f), + VGA_PIO->sm[VGA_SM0].addr & 0x1f); + + next = make_timeout_time_ms(1000); + } + tight_loop_contents(); + } } diff --git a/pico_flash_region.ld b/pico_flash_region.ld new file mode 100644 index 0000000..df5e392 --- /dev/null +++ b/pico_flash_region.ld @@ -0,0 +1 @@ +FLASH(rx) : ORIGIN = 0x10000000, LENGTH = (2 * 1024 * 1024) diff --git a/src/vga.cpp b/src/vga.cpp index 75b8903..b6d46bd 100644 --- a/src/vga.cpp +++ b/src/vga.cpp @@ -14,8 +14,12 @@ #include #include "hardware/divider.h" -#include "hardware/dma.h" #include "hardware/sync.h" +#include "hardware/pio.h" +#include "hardware/dma.h" +#include "hardware/irq.h" +#include "hardware/clocks.h" // SDK 2.x: ensure clock helpers available +#include "pico/platform.h" // memory barriers (__dmb/__dsb) // scanline type u8 ScanlineType[MAXLINE]; @@ -48,8 +52,8 @@ u32 LineBufSync[10]; // vertical synchronization ALIGNED u8 LineBuf0[BLACK_MAX]; // line buffer with black color (used to clear rest of scanline) // control buffers (BufInx = 0 running CtrlBuf1 and preparing CtrlBuf2, BufInx = 1 running CtrlBuf2 and preparing CtrlBuf1) -u32 CtrlBuf1[CBUF_MAX]; // base layer control pairs: u32 count, read address (must be terminated with [0,0]) -u32 CtrlBuf2[CBUF_MAX]; // base layer control pairs: u32 count, read address (must be terminated with [0,0]) +__attribute__((aligned(8))) u32 CtrlBuf1[CBUF_MAX]; // base layer control pairs: u32 count, read address (must be terminated with [0,0]) +__attribute__((aligned(8))) u32 CtrlBuf2[CBUF_MAX]; // base layer control pairs: u32 count, read address (must be terminated with [0,0]) int CtrlBufSize[LAYERS_MAX] = { CBUF0_MAX, CBUF1_MAX, CBUF2_MAX, CBUF3_MAX }; // size of control buffers @@ -62,11 +66,20 @@ u32 RenderTextMask[512]; // saved integer divider state hw_divider_state_t DividerState; +// --- DEBUG (USB serial) --- +// Safe to read from main loop; written by ISR. +// Define VGA_DEBUG in your build to enable these counters. +#ifdef VGA_DEBUG +volatile uint32_t g_isr_count = 0; // how many times VgaLine() fired +volatile uint32_t g_last_scanline = 0; // last ScanLine observed in ISR +volatile uint32_t g_last_linetype = 0; // last linetype observed in ISR +#endif + // process scanline buffers (will save integer divider state into DividerState) int __not_in_flash_func(VgaBufProcess)() { // Clear the interrupt request for DMA control channel - dma_hw->ints0 = (1u << VGA_DMA_PIO0); + dma_channel_acknowledge_irq0(VGA_DMA_PIO0); // switch current buffer index // BufInx = 0 running CtrlBuf1 and preparing CtrlBuf2, BufInx = 1 running CtrlBuf2 and preparing CtrlBuf1 @@ -75,6 +88,7 @@ int __not_in_flash_func(VgaBufProcess)() BufInx = bufinx ^ 1; // update DMA control channels of base layer, and run it + __dmb(); // ensure control words are visible before arming CB0 (SDK 2.x) dma_channel_set_read_addr(VGA_DMA_CB0, CtrlBufNext[0], true); // save integer divider state @@ -182,6 +196,7 @@ int __not_in_flash_func(VgaBufProcess)() pio_sm_exec(VGA_PIO, sm, pio_encode_jmp(CurLayerProg.entry+LAYER_OFFSET)); // start DMA + __dmb(); dma_channel_set_read_addr(VGA_DMA_CB(layer), CtrlBufNext[layer], true); } } @@ -389,6 +404,10 @@ u32* __not_in_flash_func(VgaBufRender)(u32* cbuf, u32* cbuf0, u8* dbuf, int y0) // VGA DMA handler - called on end of every scanline extern "C" void __not_in_flash_func(VgaLine)() { + #ifdef VGA_DEBUG + g_isr_count++; + #endif + // process scanline buffers (will save integer divider state into DividerState) int bufinx = VgaBufProcess(); @@ -485,6 +504,12 @@ extern "C" void __not_in_flash_func(VgaLine)() *cbuf++ = 0; // end mark *cbuf++ = 0; // end mark + #ifdef VGA_DEBUG + // Capture last-known values for printing from main (do NOT printf in ISR) + g_last_linetype = linetype; + g_last_scanline = (uint32_t)ScanLine; // ScanLine updated in VgaBufProcess() + #endif + // restore integer divider state hw_divider_restore_state(&DividerState); } @@ -514,6 +539,15 @@ void VgaDmaInit() // increment address on read from memory channel_config_set_read_increment(&cfg, true); + // In SDK 2.x do NOT rely on default pacing; force unpaced writes so the two + // 32-bit writes (TRANS_COUNT -> READ_ADDR_TRIG) happen back-to-back. + // This mirrors the effective behavior under 1.5.1 and avoids stalls where + // the control stream blocks waiting for a peripheral TREQ. WV + channel_config_set_dreq(&cfg, DREQ_FORCE); + + // Keep CB channel quiet (we don't enable its IRQ anyway). WV + channel_config_set_irq_quiet(&cfg, true); + // increment address on write to DMA port channel_config_set_write_increment(&cfg, true); @@ -571,12 +605,16 @@ void VgaDmaInit() 0, // number of transfers in u32 false // do not start immediately ); + + uint32_t c = dma_hw->ch[VGA_DMA_PIO0].al1_ctrl; + printf("DATA.al1_ctrl=0x%08x treq_sel=%u\n", c, (c >> DMA_CH0_CTRL_TRIG_TREQ_SEL_LSB) & 0x3f); } // ==== initialize IRQ0, raised from base layer 0 // enable DMA channel IRQ0 - dma_channel_set_irq0_enabled(VGA_DMA_PIO0, true); + // dma_channel_set_irq0_enabled(VGA_DMA_PIO0, true); + dma_set_irq0_channel_mask_enabled(1u << VGA_DMA_PIO0, true); // set DMA IRQ handler irq_set_exclusive_handler(DMA_IRQ_0, VgaLine); @@ -605,7 +643,18 @@ void VgaPioInit() prg.instructions = ins; prg.length = vga_program.length; prg.origin = BASE_OFFSET; - pio_add_program(VGA_PIO, &prg); + prg.pio_version = vga_program.pio_version; + #if PICO_PIO_VERSION > 0 + prg.used_gpio_ranges = vga_program.used_gpio_ranges; + #endif + + // pio_add_program(VGA_PIO, &prg); // WV + int load_offset = pio_add_program_at_offset(VGA_PIO, &prg, BASE_OFFSET); + if (load_offset < 0) { + panic("vga_program won't fit at BASE_OFFSET=%d\n", BASE_OFFSET); + } else { + printf("vga_program loaded at %d\n", load_offset); + } // load layer program if (LayerProgInx != LAYERPROG_BASE) @@ -623,7 +672,18 @@ void VgaPioInit() prg.instructions = ins; prg.length = CurLayerProg.length; prg.origin = LAYER_OFFSET; - pio_add_program(VGA_PIO, &prg); + prg.pio_version = CurLayerProg.prg->pio_version; + #if PICO_PIO_VERSION > 0 + prg.used_gpio_ranges = CurLayerProg.prg->used_gpio_ranges; + #endif + + // pio_add_program(VGA_PIO, &prg); // WV + int load_offset = pio_add_program_at_offset(VGA_PIO, &prg, LAYER_OFFSET); + if (load_offset < 0) { + panic("layer program won't fit at LAYER_OFFSET=%d\n", LAYER_OFFSET); + } else { + printf("layer program loaded at %d\n", load_offset); + } } // connect PIO to the pad @@ -666,6 +726,15 @@ void VgaPioInit() sm_config_set_sideset(&cfg, 1, false, false); sm_config_set_sideset_pins(&cfg, VGA_GPIO_SYNC); + // --- TEMP: sanity-check clock divider --- + float div = CurVmode.div; + printf("clk_sys=%u Hz CurVmode.div=%f CurVmode.cpp=%u\n", + (unsigned)clock_get_hz(clk_sys), div, (unsigned)CurVmode.cpp); + + // If div is bogus, clamp to something safe so the SM will run + if (!(div > 0.01f && div < 65536.0f) || !isfinite(div)) div = 1.0f; + sm_config_set_clkdiv(&cfg, div); + // initialize state machine pio_sm_init(VGA_PIO, VGA_SM0, vga_offset_entry+BASE_OFFSET, &cfg); } @@ -680,6 +749,16 @@ void VgaPioInit() } } +static inline uint32_t cw_jump(uint32_t cw_be) { + uint32_t cw = __builtin_bswap32(cw_be); + return (cw >> 27) & 0x1f; +} + +static inline uint32_t cw_count(uint32_t cw_be) { + uint32_t cw = __builtin_bswap32(cw_be); + return cw & 0x07ffffff; +} + // initialize scanline buffers void VgaBufInit() { @@ -752,6 +831,13 @@ void VgaBufInit() CtrlBuf2[2] = 0; // stop mark CtrlBuf2[3] = 0; // stop mark + + printf("JUMPS: HsBp[0]=%u HsBp[3]=%u Dark[0]=%u Sync[0]=%u Sync[1]=%u\n", + cw_jump(LineBufHsBp[0]), // expect 17 (sync) + cw_jump(LineBufHsBp[3]), // expect 28 (output) + cw_jump(LineBufDark[0]), // expect 17 (sync) + cw_jump(LineBufSync[0]), // VGA: expect 17 (sync) + cw_jump(LineBufSync[1])); // VGA: expect 20 (dark) } // terminate VGA service @@ -773,7 +859,7 @@ void VgaTerm() dma_channel_set_irq0_enabled(VGA_DMA_PIO0, false); // Clear the interrupt request for DMA control channel - dma_hw->ints0 = (1u << VGA_DMA_PIO0); + dma_channel_acknowledge_irq0(VGA_DMA_PIO0); // stop all state machines pio_set_sm_mask_enabled(VGA_PIO, VGA_SMALL, false); @@ -1008,14 +1094,59 @@ void VgaInit(const sVmode* vmode) // initialize DMA VgaDmaInit(); + // // -- PRIME THE STATE MACHINE -- + + // // 1) Make sure SM0 FIFOs are empty + // pio_sm_clear_fifos(VGA_PIO, VGA_SM0); + + // // 2) Take the first control word from the stream DMA will send. + // // CtrlBuf1[1] holds the pointer to the first control-word array. + // uint32_t *first_stream = (uint32_t*)CtrlBuf1[1]; + + // // Your control words in RAM are stored BYTESWAP(...), and the data-DMA + // // has bswap enabled, so the SM normally sees the *un*-swapped value. + // // Since we’re bypassing DMA here, swap it back once. + // uint32_t first_cw = __builtin_bswap32(first_stream[0]); + + // // 3) Stuff that word into TX, then execute a PULL (even while SM disabled) + // // so OSR is primed before the first `out pc,5`. + // pio_sm_put_blocking(VGA_PIO, VGA_SM0, first_cw); + // pio_sm_exec(VGA_PIO, VGA_SM0, pio_encode_pull(/*block*/false, /*if_empty*/false)); + + // // -- ------------------------- + // enable DMA IRQ - irq_set_enabled(DMA_IRQ_0, true); + // irq_set_enabled(DMA_IRQ_0, true); // WV removed - // start DMA with base layer 0 - dma_channel_start(VGA_DMA_CB0); + // Clear any stale IRQ before enabling and starting + dma_channel_acknowledge_irq0(VGA_DMA_PIO0); - // run state machines + __dmb(); + dma_channel_start(VGA_DMA_CB0); + + // Run state machines FIRST so the PIO is consuming immediately when TX is fed. + // (In SDK 2.x this ordering avoids a rare DREQ/chain race seen at start-of-frame.) pio_enable_sm_mask_in_sync(VGA_PIO, LayerMask); + + pio_sm_hw_t *sm = &VGA_PIO->sm[VGA_SM0]; + printf("EXECCTRL=0x%08x SHIFTCTRL=0x%08x CLKDIV=0x%08x PINCTRL=0x%08x\n", + sm->execctrl, sm->shiftctrl, sm->clkdiv, sm->pinctrl); + + // 3) Force the very first control word into OSR, then jump + // (TX FIFO already has data from step 1) + pio_sm_exec(VGA_PIO, VGA_SM0, pio_encode_pull(false, false)); // OSR := first CW (non-blocking; FIFO has data) + pio_sm_exec(VGA_PIO, VGA_SM0, pio_encode_out(pio_pc, 5)); // jump to first handler (sync/dark/output) + + uint32_t ctrl = VGA_PIO->ctrl; + bool sm0_en = !!(ctrl & (1u << (PIO_CTRL_SM_ENABLE_LSB + VGA_SM0))); + printf("SM0 enabled=%d pc=%u fstat=0x%08x txlvl=%u\n", + sm0_en, + (unsigned)pio_sm_get_pc(VGA_PIO, VGA_SM0), + (unsigned)VGA_PIO->fstat, + (unsigned)pio_sm_get_tx_fifo_level(VGA_PIO, VGA_SM0)); + + // Now enable DMA IRQ and kick the first control pair + irq_set_enabled(DMA_IRQ_0, true); } const sVmode* volatile VgaVmodeReq = NULL; // request to reinitialize videomode, 1=only stop driver