From 0ae9764e77a0d6ee26df6e594d17a31291296a22 Mon Sep 17 00:00:00 2001 From: Philip Smart Date: Thu, 30 Apr 2026 10:00:15 +0100 Subject: [PATCH] Optimisations for debug and hotpaths --- projects/tzpuPico/src/drivers/PIT8253.c | 8 +- projects/tzpuPico/src/drivers/PPI8255.c | 7 +- projects/tzpuPico/src/include/debug.h | 3 +- .../tzpuPico/src/model/BaseZ80/version.txt | 2 +- projects/tzpuPico/src/usb_bridge.c | 195 ++++++++++++++++-- projects/tzpuPico/version.txt | 2 +- 6 files changed, 191 insertions(+), 26 deletions(-) diff --git a/projects/tzpuPico/src/drivers/PIT8253.c b/projects/tzpuPico/src/drivers/PIT8253.c index 755e957..cd15cf0 100644 --- a/projects/tzpuPico/src/drivers/PIT8253.c +++ b/projects/tzpuPico/src/drivers/PIT8253.c @@ -38,7 +38,7 @@ // ----------------------------------------------------------------------- // Decrement a 16-bit value by 1 in BCD mode (4-digit BCD: 0000-9999). -static uint16_t pitBcdDecrement(uint16_t val) +static uint16_t __not_in_flash_func(pitBcdDecrement)(uint16_t val) { uint16_t d0 = (val) & 0xF; uint16_t d1 = (val >> 4) & 0xF; @@ -57,7 +57,7 @@ static uint16_t pitBcdDecrement(uint16_t val) // Get the effective count value for terminal count detection. // A programmed count of 0 means 0x10000 (65536) in binary or 10000 in BCD. -static uint32_t pitEffectiveCount(t_PIT8253Counter *ctr) +static uint32_t __not_in_flash_func(pitEffectiveCount)(t_PIT8253Counter *ctr) { if (ctr->reload == 0) return ctr->bcd ? 10000 : 0x10000; @@ -65,7 +65,7 @@ static uint32_t pitEffectiveCount(t_PIT8253Counter *ctr) } // Update a single counter based on elapsed wall-clock time. -static void pitUpdateCounter(t_PIT8253 *pit, int ch) +static void __not_in_flash_func(pitUpdateCounter)(t_PIT8253 *pit, int ch) { t_PIT8253Counter *ctr = &pit->counter[ch]; @@ -262,7 +262,7 @@ void PIT8253_reset(t_PIT8253 *pit) pit->initialized = false; } -uint8_t PIT8253_IO(t_PIT8253 *pit, bool read, uint8_t port, uint8_t data) +uint8_t __not_in_flash_func(PIT8253_IO)(t_PIT8253 *pit, bool read, uint8_t port, uint8_t data) { if (!pit->initialized) { diff --git a/projects/tzpuPico/src/drivers/PPI8255.c b/projects/tzpuPico/src/drivers/PPI8255.c index fd303f2..e331d9d 100644 --- a/projects/tzpuPico/src/drivers/PPI8255.c +++ b/projects/tzpuPico/src/drivers/PPI8255.c @@ -30,6 +30,7 @@ // ----------------------------------------------------------------------- #include +#include #include "drivers/PPI8255.h" // ----------------------------------------------------------------------- @@ -37,7 +38,7 @@ // ----------------------------------------------------------------------- // Decode the control word (D7=1) and update direction/mode fields. -static void ppiDecodeControlWord(t_PPI8255 *ppi, uint8_t cw) +static void __not_in_flash_func(ppiDecodeControlWord)(t_PPI8255 *ppi, uint8_t cw) { ppi->controlWord = cw; ppi->groupAMode = (cw >> 5) & 0x03; // D6:D5 @@ -54,7 +55,7 @@ static void ppiDecodeControlWord(t_PPI8255 *ppi, uint8_t cw) } // Build the Port C read value, combining output latch bits with input bits. -static uint8_t ppiBuildPortCRead(t_PPI8255 *ppi) +static uint8_t __not_in_flash_func(ppiBuildPortCRead)(t_PPI8255 *ppi) { uint8_t value = 0; @@ -149,7 +150,7 @@ void PPI8255_setInputCallback(t_PPI8255 *ppi, t_PPIPort port, t_PPIInputCallback } } -uint8_t PPI8255_IO(t_PPI8255 *ppi, bool read, uint8_t port, uint8_t data) +uint8_t __not_in_flash_func(PPI8255_IO)(t_PPI8255 *ppi, bool read, uint8_t port, uint8_t data) { switch (port) { diff --git a/projects/tzpuPico/src/include/debug.h b/projects/tzpuPico/src/include/debug.h index b148f57..cd0fa1a 100644 --- a/projects/tzpuPico/src/include/debug.h +++ b/projects/tzpuPico/src/include/debug.h @@ -58,7 +58,7 @@ extern mutex_t debugMutex; mutex_enter_blocking(&debugMutex); \ if (debugBuffer != NULL && (debugBufferSize + strlen(a)) < MAX_DEBUG_BUFFER_SIZE - 256) \ { \ - debugBufferSize += sprintf(&debugBuffer[debugBufferSize], a, ##__VA_ARGS__); \ + debugBufferSize += dbg_sprintf(&debugBuffer[debugBufferSize], a, ##__VA_ARGS__); \ ((volatile uint32_t *) 0x117EF004)[0] = (uint32_t) debugBufferSize; \ } \ mutex_exit(&debugMutex); \ @@ -117,6 +117,7 @@ extern mutex_t debugMutex; // Public Prototypes, defined in usb_bridge module. int debugWrite(char *, int); int debugRead(char *, int); +int dbg_sprintf(char *buf, const char *fmt, ...) __attribute__((format(printf, 2, 3))); #else diff --git a/projects/tzpuPico/src/model/BaseZ80/version.txt b/projects/tzpuPico/src/model/BaseZ80/version.txt index 7cbbdcc..bb62338 100644 --- a/projects/tzpuPico/src/model/BaseZ80/version.txt +++ b/projects/tzpuPico/src/model/BaseZ80/version.txt @@ -1 +1 @@ -2.221 +2.222 diff --git a/projects/tzpuPico/src/usb_bridge.c b/projects/tzpuPico/src/usb_bridge.c index be295a8..b613748 100644 --- a/projects/tzpuPico/src/usb_bridge.c +++ b/projects/tzpuPico/src/usb_bridge.c @@ -36,6 +36,7 @@ ///////////////////////////////////////////////////////////////////////////////////////////////////////// #include +#include #include #include #include @@ -421,7 +422,7 @@ int initUSBBridge(void) // Read from the USB channel (itf) and store any data into the fromUsbBuffer. This data is then extracted as needed and sent on // the UART channel. -void readUSB(uint8_t itf) +void __not_in_flash_func(readUSB)(uint8_t itf) { // Check value. if (itf >= CFG_TUD_CDC) @@ -447,7 +448,7 @@ void readUSB(uint8_t itf) // Data sent over UART to pico for forwarding to USB. // Data is stored in fromUartBuffer for channel (itf) and is written directly to the USB handle for this channel. // Pointer in fromUartBuffer is updated by number of bytes written. -void writeUSB(uint8_t itf) +void __not_in_flash_func(writeUSB)(uint8_t itf) { // Locals. t_UartData *ud = &UART_DATA[itf]; @@ -488,9 +489,171 @@ void writeUSB(uint8_t itf) #endif } +// --------------------------------------------------------------------------- +// RAM-resident sprintf — avoids XIP flash stalls when called from debugf on +// Core 1. Supports the format specifiers actually used in this project: +// %d %i %u %x %X %s %c %p %% %f with 0/-/width, l/ll/z length modifiers. +// ~1.5 KB compiled — much smaller than pulling pico_printf into RAM. +// --------------------------------------------------------------------------- +#if defined(DEBUG) +static void __not_in_flash_func(dbg_out_char)(char **dst, char *end, char c) +{ + if (*dst < end) *(*dst)++ = c; +} + +static void __not_in_flash_func(dbg_out_str)(char **dst, char *end, const char *s) +{ + while (*s && *dst < end) *(*dst)++ = *s++; +} + +static int __not_in_flash_func(dbg_out_int)(char **dst, char *end, unsigned long long val, int base, int upper, int width, char pad, int neg) +{ + char buf[24]; + const char *digits = upper ? "0123456789ABCDEF" : "0123456789abcdef"; + int pos = 0; + int written = 0; + + if (val == 0) buf[pos++] = '0'; + while (val) { buf[pos++] = digits[val % base]; val /= base; } + if (neg) buf[pos++] = '-'; + + int fill = width - pos; + while (fill-- > 0) { dbg_out_char(dst, end, pad); written++; } + while (pos-- > 0) { dbg_out_char(dst, end, buf[pos]); written++; } + return written; +} + +int __not_in_flash_func(dbg_sprintf)(char *buf, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + char *dst = buf; + char *end = buf + MAX_DEBUG_BUFFER_SIZE; + + while (*fmt && dst < end) + { + if (*fmt != '%') { *dst++ = *fmt++; continue; } + fmt++; // skip '%' + + // Flags + char pad = ' '; + bool leftAlign = false; + while (*fmt == '0' || *fmt == '-') { + if (*fmt == '0') pad = '0'; + if (*fmt == '-') leftAlign = true; + fmt++; + } + (void)leftAlign; // left-align not commonly used in debugf + + // Width + int width = 0; + if (*fmt == '*') { width = va_arg(ap, int); fmt++; } + else while (*fmt >= '0' && *fmt <= '9') { width = width * 10 + (*fmt++ - '0'); } + + // Precision (consume but only apply to %s and %f) + int prec = -1; + if (*fmt == '.') { + fmt++; + prec = 0; + if (*fmt == '*') { prec = va_arg(ap, int); fmt++; } + else while (*fmt >= '0' && *fmt <= '9') { prec = prec * 10 + (*fmt++ - '0'); } + } + + // Length modifier + int len = 0; // 0=int, 1=long, 2=long long, 3=size_t + if (*fmt == 'l') { len = 1; fmt++; if (*fmt == 'l') { len = 2; fmt++; } } + else if (*fmt == 'z') { len = 3; fmt++; } + else if (*fmt == 'h') { fmt++; if (*fmt == 'h') fmt++; } // consume hh/h + + // Conversion + switch (*fmt) + { + case 'd': case 'i': { + long long v; + if (len == 2) v = va_arg(ap, long long); + else if (len >= 1) v = va_arg(ap, long); + else v = va_arg(ap, int); + bool neg = (v < 0); + unsigned long long uv = neg ? (unsigned long long)(-v) : (unsigned long long)v; + dbg_out_int(&dst, end, uv, 10, 0, width, pad, neg); + break; + } + case 'u': { + unsigned long long v; + if (len == 2) v = va_arg(ap, unsigned long long); + else if (len >= 1) v = va_arg(ap, unsigned long); + else v = va_arg(ap, unsigned int); + dbg_out_int(&dst, end, v, 10, 0, width, pad, 0); + break; + } + case 'x': case 'X': { + unsigned long long v; + if (len == 2) v = va_arg(ap, unsigned long long); + else if (len >= 1) v = va_arg(ap, unsigned long); + else v = va_arg(ap, unsigned int); + dbg_out_int(&dst, end, v, 16, (*fmt == 'X'), width, pad, 0); + break; + } + case 's': { + const char *s = va_arg(ap, const char *); + if (!s) s = "(null)"; + if (prec >= 0) { + int n = 0; + while (n < prec && s[n]) { dbg_out_char(&dst, end, s[n]); n++; } + } else { + dbg_out_str(&dst, end, s); + } + break; + } + case 'c': + dbg_out_char(&dst, end, (char)va_arg(ap, int)); + break; + case 'p': { + uintptr_t v = (uintptr_t)va_arg(ap, void *); + dbg_out_int(&dst, end, v, 16, 0, 8, '0', 0); + break; + } + case 'f': { + double v = va_arg(ap, double); + if (v < 0) { dbg_out_char(&dst, end, '-'); v = -v; } + unsigned long ipart = (unsigned long)v; + dbg_out_int(&dst, end, ipart, 10, 0, 0, ' ', 0); + dbg_out_char(&dst, end, '.'); + int dp = (prec >= 0) ? prec : 6; + double frac = v - (double)ipart; + for (int i = 0; i < dp; i++) { + frac *= 10.0; + int digit = (int)frac; + dbg_out_char(&dst, end, '0' + digit); + frac -= digit; + } + break; + } + case '%': + *dst++ = '%'; + break; + case '\0': + goto done; + default: + dbg_out_char(&dst, end, '%'); + dbg_out_char(&dst, end, *fmt); + break; + } + fmt++; + } +done: + if (dst < end) *dst = '\0'; + else *(end - 1) = '\0'; + + va_end(ap); + return (int)(dst - buf); +} +#endif + // Debug channel - read data from the USB device purely for debugging. #if defined(DEBUG) -int debugRead(char *buf, int max_len) +int __not_in_flash_func(debugRead)(char *buf, int max_len) { t_UartData *ud = &UART_DATA[DEBUG_CHANNEL]; uint32_t len = tud_cdc_n_available(DEBUG_CHANNEL); @@ -507,12 +670,12 @@ int debugRead(char *buf, int max_len) #endif // Forward declaration. -void usb_cdc_process(uint8_t itf); +void __not_in_flash_func(usb_cdc_process)(uint8_t itf); // Debug channel - a channel within the USB device purely for sending debug information. // #if defined(DEBUG) -int debugWrite(char *dbgMsg, int len) +int __not_in_flash_func(debugWrite)(char *dbgMsg, int len) { // Locals. static int syncCnt = 1; @@ -589,7 +752,7 @@ int debugWrite(char *dbgMsg, int len) #ifdef EMBEDDED_LOG // Method to route a log message received from the ESP_CHANNEL to any other channel for output. // If a log message isnt routed and another arrives, it will be wasted. -void logRoute(int channel) +void __not_in_flash_func(logRoute)(int channel) { // Locals. t_UartData *ud = &UART_DATA[channel]; @@ -642,7 +805,7 @@ void logRoute(int channel) // Method to read a received embedded frame, used by the ESP32 to send responses, which it embeds // within log/console messages, as responses to previously issued commands. -int uartReadFrame(char *msg) +int __not_in_flash_func(uartReadFrame)(char *msg) { // Locals. int result = 0; @@ -692,7 +855,7 @@ int uartReadFrame(char *msg) // Method to write a command frame into the data stream being sent to the ESP32. // -bool uartWriteFrame(const char *msg, int len) +bool __not_in_flash_func(uartWriteFrame)(const char *msg, int len) { // Locals. bool result = false; @@ -796,7 +959,7 @@ bool uartWriteOOB(uint32_t oobCmd) } // Method to return a command sent by the ESP32 which was embedded within the UART data flow. -int uartReadCmd(char *msg) +int __not_in_flash_func(uartReadCmd)(char *msg) { // Locals. int result = 0; @@ -831,7 +994,7 @@ int uartReadCmd(char *msg) return (result); } -void usb_cdc_process(uint8_t itf) +void __not_in_flash_func(usb_cdc_process)(uint8_t itf) { t_UartData *ud = &UART_DATA[itf]; @@ -843,7 +1006,7 @@ void usb_cdc_process(uint8_t itf) writeUSB(itf); } -void readUART(uint8_t itf) +void __not_in_flash_func(readUART)(uint8_t itf) { t_UartData *ud = &UART_DATA[itf]; uint8_t rxChar; @@ -1013,13 +1176,13 @@ void readUART(uint8_t itf) } #if defined(USE_UART0) -void irqUART0(void) +void __not_in_flash_func(irqUART0)(void) { readUART(0); } #endif -void irqUART1(void) +void __not_in_flash_func(irqUART1)(void) { // Default action is to process all incoming data as ESP32->USB. If a pattern is matched during receipt, which indicates the ESP32 is // attempting to update the rp2350, then all uart transactions are sent to the fw update handler. @@ -1035,7 +1198,7 @@ void irqUART1(void) #endif } -bool putUART(uart_inst_t *const uart, uint8_t txChar) +bool __not_in_flash_func(putUART)(uart_inst_t *const uart, uint8_t txChar) { // Locals. int timeout = 500; @@ -1050,7 +1213,7 @@ bool putUART(uart_inst_t *const uart, uint8_t txChar) } // Write from the UART to a USB virtual channel. -void writeUART(uint8_t itf) +void __not_in_flash_func(writeUART)(uint8_t itf) { // Check value. if (itf >= CFG_TUD_CDC) @@ -1212,7 +1375,7 @@ int changeActivePartition(int partition) // Method to poll the USB/UART buffers/devices. If using bootloader, process any incoming firmware update requests and data. // -void pollUSBtoUART(void) +void __not_in_flash_func(pollUSBtoUART)(void) { // Locals. #if defined(BOOTLOADER) diff --git a/projects/tzpuPico/version.txt b/projects/tzpuPico/version.txt index 437e27c..aacb309 100644 --- a/projects/tzpuPico/version.txt +++ b/projects/tzpuPico/version.txt @@ -1 +1 @@ -2.208 +2.209