Files
pico/projects/tzpuPico/esp32/main/CommandProcessor.cpp
2026-03-24 22:22:37 +00:00

479 lines
20 KiB
C++

/////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// Name: CommandProcessor.cpp
// Created: Jan 2025
// Version: v1.1
// Author(s): Philip Smart
// Description: Command processor — receives binary IPC commands from the RP2350 over SPI
// and dispatches to the appropriate handler.
// v1.1: Binary SPI IPC replaces UART ASCII command/response round-trips.
// vTaskDelay(1) polling loop eliminated; SPI slave receive blocks
// (CMD_SPI_POLL_TICKS timeout) — no wasted tick delays on idle bus.
// UART path retained for OOB handling by IO_stdinReaderTask.
// Binary opcode dispatch (O(1) switch) replaces std::map lookup.
// Credits:
// Copyright: (c) 2019-2026 Philip Smart <philip.smart@net2net.org>
//
// History: v1.00 Jan 2025 - Initial write.
// v1.10 Mar 2026 - Binary SPI IPC, opcode dispatch, semaphore-free blocking wait.
//
// Notes: See Makefile to enable/disable conditional components
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////
// This source file is free software: you can redistribute it and#or modify
// it under the terms of the GNU General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This source file is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
/////////////////////////////////////////////////////////////////////////////////////////////////////////
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "driver/spi_slave.h"
#include "esp_system.h"
#include "esp_log.h"
#include "sdmmc_cmd.h"
#include "driver/sdspi_host.h"
#include "driver/spi_common.h"
#include "driver/uart.h"
#include "driver/gpio.h"
#include "esp_vfs_fat.h"
#include <string>
#include <unordered_map>
#include <vector>
#include <functional>
#include "IO.h"
#include "CommandProcessor.h"
#include "ipc_protocol.h"
#include "esp_attr.h" // RTC_NOINIT_ATTR
// Declared in IO.cpp — set before esp_restart() in the OOB handler so we know
// this SW reset was clean (no mid-transaction state) and can use the fast path.
#define OOB_RESTART_MAGIC 0xAA55CC33u
// Declared in IO.cpp — set by OOB handler to request SPI slave reinitalization.
extern volatile bool g_spi_clear_requested;
// ---------------------------------------------------------------------------
// ESP32→RP2350 reverse command queue.
//
// Commands are delivered to the RP2350 via the NOP poll T3 response filename
// field. Two dispatch modes are supported:
//
// CP_queueCmd(cmd) — async: push and return immediately.
// Use when the result is not needed.
//
// CP_sendCmd(cmd, timeoutMs) — sync: push and block until the NOP poll
// has delivered the command to the RP2350
// (i.e. it appeared in a T3 response).
// Returns true if delivered within timeout.
//
// Multiple commands can be queued simultaneously up to CP_CMD_QUEUE_DEPTH.
// Async commands beyond the queue depth are silently dropped (non-blocking
// xQueueSend with timeout 0). Sync commands wait up to 100 ms to enqueue.
// ---------------------------------------------------------------------------
#define CP_CMD_QUEUE_DEPTH 8
typedef struct
{
char cmd[IPCF_FILENAME_LEN];
SemaphoreHandle_t doneSem; // NULL = async; semaphore = sync (signalled on delivery)
} t_CpCmd;
static QueueHandle_t s_cmdQueue = NULL;
static t_CpCmd s_pendingCmd = {}; // Command currently being delivered to RP2350 via NOP
// ---------------------------------------------------------------------------
// Initial SPI-done flag.
//
// Set to true the first time storeRP2350Info() succeeds (i.e. the RP2350 has
// sent an INF command, which it only does after completing its own startup
// file-load sequence). app_main() polls this via CP_initialSpiDone() to
// delay WiFi startup until after the RP2350's initial SPI exchanges are
// complete — prevents high-priority WiFi FreeRTOS tasks from preempting the
// CommandProcessor during the critical startup window and causing T3 HS
// timeouts on the INF exchange.
// ---------------------------------------------------------------------------
static volatile bool s_initialSpiDone = false;
bool CP_initialSpiDone(void)
{
return s_initialSpiDone;
}
void CP_markInitialSpiDone(void)
{
s_initialSpiDone = true;
}
void CP_queueCmd(const char *cmd)
{
if (!s_cmdQueue)
{
return;
}
t_CpCmd entry = {};
strncpy(entry.cmd, cmd, IPCF_FILENAME_LEN - 1);
entry.doneSem = NULL;
xQueueSend(s_cmdQueue, &entry, 0); // drop silently if queue full
}
bool CP_sendCmd(const char *cmd, uint32_t timeoutMs)
{
if (!s_cmdQueue)
{
return false;
}
SemaphoreHandle_t done = xSemaphoreCreateBinary();
if (!done)
{
return false;
}
t_CpCmd entry = {};
strncpy(entry.cmd, cmd, IPCF_FILENAME_LEN - 1);
entry.doneSem = done;
// Wait up to 100 ms to enqueue (queue full is unlikely but possible).
if (xQueueSend(s_cmdQueue, &entry, pdMS_TO_TICKS(100)) != pdTRUE)
{
vSemaphoreDelete(done);
return false;
}
// Block until cmdNop delivers the command to RP2350 (or timeout).
bool delivered = (xSemaphoreTake(done, pdMS_TO_TICKS(timeoutMs)) == pdTRUE);
vSemaphoreDelete(done);
return delivered;
}
extern uint32_t g_oob_restart_magic;
#define CMDPROCTAG "CMDPROC"
// ---------------------------------------------------------------------------
// String splitter (unchanged — used for legacy / debug paths if needed)
// ---------------------------------------------------------------------------
std::vector<std::string> CommandProcessor::split(const std::string &s, const std::string &delimiter)
{
size_t posStart = 0, posEnd, delimLen = delimiter.length();
std::string token;
std::vector<std::string> retVal;
while ((posEnd = s.find(delimiter, posStart)) != std::string::npos)
{
token = s.substr(posStart, posEnd - posStart);
posStart = posEnd + delimLen;
retVal.push_back(token);
}
retVal.push_back(s.substr(posStart));
return (retVal);
}
// ---------------------------------------------------------------------------
// waitForCommand — main command receive loop.
//
// The RP2350 now sends all disk I/O commands as binary IPC frames over SPI.
// This loop blocks on fspi.receiveBinaryCmd() with a short timeout
// (CMD_SPI_POLL_TICKS). On timeout it checks the UART frame queue for
// any legacy frames (OOB echo, debug, etc.) then loops back to SPI receive.
//
// The SPI receive post_setup_cb asserts HS HIGH each time a receive is
// queued, signalling the RP2350 that the ESP32 is ready for a command.
// This replaces the old vTaskDelay(1) polling loop entirely.
// ---------------------------------------------------------------------------
void CommandProcessor::waitForCommand(void)
{
// Startup delay — behaviour depends on reset reason:
//
// POWERON: fresh boot; SPI slave is uninitialised, HS stays LOW, so the
// RP2350 simply waits. A short delay is enough to let FreeRTOS settle
// before the first spi_slave_transmit() call.
//
// PANIC / SW restart: the ESP32 may have crashed mid-transaction while the
// RP2350 was blocked in waitForHandShake() for T2/T3 (timeout = 3000 ms).
// If we raise HS (via post_setup_cb) before that timeout expires, the
// RP2350 mistakes the T1-ready pulse for a T2/T3 HS and sends the wrong
// payload into the 64-byte T1 DMA window, breaking frame synchronisation
// for the session. 3500 ms gives 500 ms margin beyond the RP2350 timeout.
{
// Proven fixed delays — simple and reliable:
//
// POWERON (2000 ms): Both RP2350 and ESP32 boot from scratch. The delay
// gives the RP2350 time to reach FSPI_init() and drive
// CS HIGH before we register the SPI slave ISR.
//
// SW reset + OOB magic (100 ms): The RP2350 is already running with CS
// firmly driven HIGH. Only used when OOB explicitly
// sent (currently disabled, but kept for future use).
//
// SW reset, no OOB (3500 ms): spihost corruption from RP2350 reset.
// The RP2350 is rebooting — 3500 ms gives it time to
// complete boot + FSPI_init + stabilize SPI bus.
bool isOobRestart = (esp_reset_reason() != ESP_RST_POWERON) && (g_oob_restart_magic == OOB_RESTART_MAGIC);
g_oob_restart_magic = 0u; // consume flag
uint32_t delayMs;
if (esp_reset_reason() == ESP_RST_POWERON)
{
delayMs = 2000u;
}
else if (isOobRestart)
{
delayMs = 100u;
}
else
{
delayMs = 3500u;
}
ESP_LOGI(CMDPROCTAG, "Startup delay %lu ms (reset reason %d, oob=%d).", delayMs, (int) esp_reset_reason(), (int) isOobRestart);
vTaskDelay(pdMS_TO_TICKS(delayMs));
}
// Initialize the SPI slave hardware after the delay.
// By this point the RP2350 has had time to call FSPI_init() which drives
// CS (GPIO 45) HIGH — no more floating CS noise from SPI pin glitches.
// HS was configured LOW in setupEarly() so RP2350 knew to wait.
if (!fspi.init())
{
// SPI init failed — log but do NOT restart. The ESP32 must stay online
// for WiFi/web interface (firmware updates, config). SPI will be retried
// if/when the RP2350 stabilizes.
ESP_LOGE(CMDPROCTAG, "SPI slave init failed — SPI disabled, WiFi/web still active.");
}
t_IpcFrameHdr cmdFrame;
int badFrameCount = 0;
int spiErrorCount = 0;
for (;;)
{
// --- Binary SPI command receive ---
// Block indefinitely (portMAX_DELAY) waiting for the RP2350 to send a
// command frame. Using a finite timeout is UNSAFE: a timed-out call
// leaves the spi_slave_transaction_t on the stack (which goes out of
// scope) still referenced by the SPI slave ISR queue. When a real
// command later arrives, the ISR processes the stale (now dangling)
// transaction, puts its result in the queue, and the next call's
// spi_slave_get_trans_result() gets the wrong pointer → assert failure
// (spi_slave.c:524 "ret_trans == trans_desc").
//
// A secondary symptom: the stale pre-arm fires post_setup_cb (HS HIGH)
// immediately after a completed transaction's post_trans_cb (HS LOW),
// so the LOW→HIGH gap is sub-100µs and invisible to the RP2350's 100µs
// poll in waitForHandShake(false) → T3 starts while ESP32 still has a
// recv transaction armed → RP2350 receives zeros → bad frameType=00.
//
// portMAX_DELAY guarantees exactly one transaction in flight at all times.
memset(&cmdFrame, 0, IPCF_HEADER_SIZE);
esp_err_t spiRet = fspi.receiveBinaryCmd((uint8_t *) &cmdFrame, portMAX_DELAY);
// Check if OOB requested SPI slave reinit. This runs AFTER
// spi_slave_transmit returns (transaction complete, no pending DMA),
// so spi_slave_free is safe — no GDMA deadlock risk.
if (g_spi_clear_requested)
{
g_spi_clear_requested = false;
ESP_LOGW(CMDPROCTAG, "Reinitializing SPI slave (OOB request)...");
spi_slave_free(FSPI_HOST);
if (fspi.init())
{
ESP_LOGI(CMDPROCTAG, "SPI slave reinit OK.");
}
else
{
ESP_LOGE(CMDPROCTAG, "SPI slave reinit FAILED.");
}
badFrameCount = 0;
spiErrorCount = 0;
continue; // Re-arm spi_slave_transmit with fresh state
}
if (spiRet == ESP_OK)
{
spiErrorCount = 0; // SPI slave is working — reset error counter
if (cmdFrame.frameType == IPCF_TYPE_COMMAND)
{
// Valid binary command — dispatch immediately.
badFrameCount = 0;
processCommand(cmdFrame);
}
else if (cmdFrame.frameType == IPCF_TYPE_NOP || cmdFrame.frameType == 0)
{
// NOP or zero frame — normal idle. Reset bad frame counter.
badFrameCount = 0;
}
else
{
// Unknown/garbage frameType — protocol desync from RP2350 reset.
// After too many consecutive bad frames, restart to reinit SPI slave.
badFrameCount++;
if (badFrameCount <= 10 || (badFrameCount % 100) == 0)
ESP_LOGW(CMDPROCTAG, "Bad frameType=0x%02X (count=%d)", cmdFrame.frameType, badFrameCount);
}
}
else
{
// SPI slave is in an unrecoverable state (e.g. spihost NULL after a
// spurious CS/SCK glitch during RP2350 reboot).
//
// Call esp_restart() immediately — no spi_slave_free(), no esp_wifi_stop().
//
// DO NOT call spi_slave_free() here:
// If spihost[FSPI_HOST] is NULL (corrupted by SCK/CS glitches during
// RP2350 reset), spi_slave_free() returns ESP_ERR_INVALID_ARG without
// deregistering the ISR. If spihost is non-NULL but GDMA is in a bad
// state, spi_slave_free() holds the GDMA spinlock, esp_intr_free()
// re-enables interrupts, the SPI ISR fires and tries to acquire the
// same spinlock → deadlock → interrupt WDT.
//
// DO NOT call esp_wifi_stop() here:
// When WiFi is in a connection-retry cycle (as seen in the log), the
// WiFi state machine mutex is already held by the WiFi task.
// esp_wifi_stop() tries to take that mutex recursively →
// assert failed: xQueueTakeMutexRecursive queue.c:821 (pxMutex).
// Even outside a retry cycle, calling esp_wifi_stop() during WiFi
// driver init (<2 s) blocks on an internal driver lock → IWDT.
//
// By the time we reach this error path, the RP2350 has long since
// completed its own reboot and is driving SCK/CS/MOSI normally —
// no SPI ISR starvation risk. esp_restart() (a ROM function) resets
// the CPU without going through normal FreeRTOS task teardown; it
// also flushes the UART TX FIFO internally so the log line below is
// fully emitted before the reset.
// SPI slave error — likely spihost corruption from RP2350 reset.
// Do NOT restart — keep the ESP32 online for WiFi/web interface.
// Just log and keep retrying. If spihost is truly NULL, every call
// will fail, but WiFi and web interface remain functional for
// firmware updates and config management.
spiErrorCount++;
if (spiErrorCount <= 5 || (spiErrorCount % 100) == 0)
ESP_LOGE(CMDPROCTAG, "receiveBinaryCmd failed (%s), count=%d — retrying", esp_err_to_name(spiRet), spiErrorCount);
vTaskDelay(pdMS_TO_TICKS(2000));
continue;
}
}
}
// ---------------------------------------------------------------------------
// Initialisation (called by start()).
// ---------------------------------------------------------------------------
void CommandProcessor::init(void)
{
s_cmdQueue = xQueueCreate(CP_CMD_QUEUE_DEPTH, sizeof(t_CpCmd));
if (!s_cmdQueue)
{
ESP_LOGE(CMDPROCTAG, "Failed to create reverse-command queue.");
}
}
void CommandProcessor::cmdNop(const t_IpcFrameHdr &frame)
{
// NOP poll from RP2350 — deliver pending ESP32→RP2350 commands via the
// T3 response filename field.
//
// Reliability: the previous NOP's command is kept in s_pendingCmd until
// THIS NOP arrives — proving the RP2350 is alive and processed it (or
// at least received the frame). Only then do we dequeue the next entry.
// This prevents command loss when a CRC mismatch causes the RP2350 to
// discard the response: the next NOP re-sends the same command.
// --- Retire the previously-sent command (RP2350 got it) ---
if (s_pendingCmd.cmd[0] != '\0')
{
if (s_pendingCmd.doneSem)
xSemaphoreGive(s_pendingCmd.doneSem); // unblock CP_sendCmd caller
memset(&s_pendingCmd, 0, sizeof(s_pendingCmd));
}
// --- Dequeue the next command (if any) into the pending slot ---
if (s_cmdQueue && s_pendingCmd.cmd[0] == '\0')
{
xQueueReceive(s_cmdQueue, &s_pendingCmd, 0);
}
// --- Build NOP response with any pending command embedded ---
t_IpcFrameHdr hdr = {};
hdr.frameType = IPCF_TYPE_RESPONSE;
hdr.command = IPCF_CMD_NOP;
hdr.status = IPCF_STATUS_OK;
hdr.payloadLen = 0;
if (s_pendingCmd.cmd[0] != '\0')
{
strncpy(hdr.filename, s_pendingCmd.cmd, IPCF_FILENAME_LEN - 1);
}
uint32_t respSize = IPCF_HEADER_SIZE + IPCF_CRC_SIZE;
fspi.sendBinaryResp(&hdr, NULL, 0, respSize, portMAX_DELAY);
}
// ---------------------------------------------------------------------------
// start — spawn the waitForCommand task.
// ---------------------------------------------------------------------------
void CommandProcessor::start(void)
{
init();
xTaskCreate([](void *param) { static_cast<CommandProcessor *>(param)->waitForCommand(); }, "waitForCommand", TASK_STACK_SIZE, this, 12, NULL);
}
// ---------------------------------------------------------------------------
// processCommand — binary opcode dispatch.
//
// O(1) switch dispatch on frame.command (uint8_t), replacing the previous
// O(log n) std::map lookup on 3-char ASCII strings.
// ---------------------------------------------------------------------------
void CommandProcessor::processCommand(const t_IpcFrameHdr &frame)
{
ESP_LOGD(CMDPROCTAG, "CMD opcode=%02X seq=%u file=%.32s", frame.command, frame.seqNum, frame.filename);
// Any non-NOP command proves the RP2350 is alive and has moved past the
// last NOP exchange — retire any pending reverse command.
if (frame.command != IPCF_CMD_NOP && s_pendingCmd.cmd[0] != '\0')
{
if (s_pendingCmd.doneSem)
xSemaphoreGive(s_pendingCmd.doneSem);
memset(&s_pendingCmd, 0, sizeof(s_pendingCmd));
}
switch (frame.command)
{
case IPCF_CMD_NOP:
cmdNop(frame);
break;
case IPCF_CMD_RDS:
cmdReadSector(frame);
break;
case IPCF_CMD_RBURST:
cmdReadBurst(frame);
break;
case IPCF_CMD_WRS:
cmdWriteSector(frame);
break;
case IPCF_CMD_WBURST:
cmdWriteBurst(frame);
break;
case IPCF_CMD_RFILE:
case IPCF_CMD_RFD:
case IPCF_CMD_RQD:
case IPCF_CMD_RRF:
cmdReadFile(frame);
break;
case IPCF_CMD_WFILE:
cmdWriteFile(frame);
break;
case IPCF_CMD_INF:
cmdReadInfo(frame);
break;
default:
cmdUnknown(frame);
break;
}
}