diff --git a/Makefile b/Makefile
index b8d3ef6..c8b7e1a 100644
--- a/Makefile
+++ b/Makefile
@@ -48,6 +48,10 @@ DFLAGS = $(INCLUDE) -D_7ZIP_ST -DPACKAGE_VERSION=\"1.3.3\" -DFLAC_API_EXPORTS -D
CFLAGS = $(DFLAGS) -Wall -Wextra -Wno-strict-aliasing -Wno-stringop-overflow -Wno-stringop-truncation -Wno-format-truncation -Wno-psabi -Wno-restrict -c -O3
LFLAGS = -lc -lstdc++ -lm -lrt $(IMLIB2_LIB) -Llib/bluetooth -lbluetooth
+ifeq ($(PROFILING),1)
+ DFLAGS += -DPROFILING
+endif
+
$(PRJ): $(OBJ)
$(Q)$(info $@)
$(Q)$(CC) -o $@ $+ $(LFLAGS)
diff --git a/input.cpp b/input.cpp
index 62cbd03..b679132 100644
--- a/input.cpp
+++ b/input.cpp
@@ -27,6 +27,7 @@
#include "video.h"
#include "joymapping.h"
#include "support.h"
+#include "profiling.h"
#define NUMDEV 30
#define NUMPLAYERS 6
@@ -5178,6 +5179,8 @@ int input_test(int getchar)
int input_poll(int getchar)
{
+ PROFILE_FUNCTION();
+
static int af[NUMPLAYERS] = {};
static uint32_t time[NUMPLAYERS] = {};
static uint32_t joy_prev[NUMPLAYERS] = {};
diff --git a/menu.cpp b/menu.cpp
index 589ae20..3ad06e3 100644
--- a/menu.cpp
+++ b/menu.cpp
@@ -63,6 +63,7 @@ along with this program. If not, see .
#include "support.h"
#include "bootcore.h"
#include "ide.h"
+#include "profiling.h"
/*menu states*/
enum MENU
@@ -899,6 +900,8 @@ static int page = 0;
void HandleUI(void)
{
+ PROFILE_FUNCTION();
+
if (bt_timer >= 0)
{
if (!bt_timer) bt_timer = (int32_t)GetTimer(6000);
diff --git a/osd.cpp b/osd.cpp
index 6cc7a38..cabd1ac 100644
--- a/osd.cpp
+++ b/osd.cpp
@@ -45,6 +45,7 @@ as rotated copies of the first 128 entries. -- AMR
#include "logo.h"
#include "user_io.h"
#include "hardware.h"
+#include "profiling.h"
#include "support.h"
@@ -661,6 +662,7 @@ char* OsdCoreNameGet()
void OsdUpdate()
{
+ PROFILE_FUNCTION();
int n = is_menu() ? 19 : osd_size;
for (int i = 0; i < n; i++)
{
diff --git a/profiling.cpp b/profiling.cpp
new file mode 100644
index 0000000..1943cb8
--- /dev/null
+++ b/profiling.cpp
@@ -0,0 +1,130 @@
+#ifdef PROFILING
+
+#include "profiling.h"
+
+#include "str_util.h"
+
+#include
+#include
+#include
+
+struct Event
+{
+ const char *name;
+ uint32_t begin_idx;
+ struct timespec ts;
+};
+
+static constexpr int MAX_EVENTS = 512; // must be pow2
+static Event s_events[MAX_EVENTS]; // circular buffer
+static uint32_t s_event_tail = 0;
+
+static constexpr Event *get_event(uint32_t idx)
+{
+ return &s_events[idx % MAX_EVENTS];
+}
+
+uint32_t profiling_event_begin(const char *name)
+{
+ Event *newEvent = get_event(s_event_tail);
+ newEvent->begin_idx = s_event_tail;
+ newEvent->name = name;
+ clock_gettime(CLOCK_MONOTONIC, &newEvent->ts);
+
+ uint32_t r = s_event_tail;
+ s_event_tail++;
+ return r;
+}
+
+void profiling_event_end(uint32_t begin_idx, const char *name)
+{
+ Event *newEvent = get_event(s_event_tail);
+ newEvent->begin_idx = begin_idx;
+ newEvent->name = name;
+ clock_gettime(CLOCK_MONOTONIC, &newEvent->ts);
+ s_event_tail++;
+}
+
+// result_ns = a - b
+static uint64_t delta_ns(const struct timespec *a, const struct timespec *b)
+{
+ struct timespec ts;
+
+ ts.tv_sec = a->tv_sec - b->tv_sec;
+ ts.tv_nsec = a->tv_nsec - b->tv_nsec;
+ if (ts.tv_nsec < 0)
+ {
+ ts.tv_nsec += 1000000000;
+ ts.tv_sec -= 1;
+ }
+
+ uint64_t delta = ts.tv_sec * 1000000000ULL;
+ delta += ts.tv_nsec;
+ return delta;
+}
+
+
+// Bookkeeping data for spike report
+static uint64_t inclusive_times[MAX_EVENTS];
+static uint64_t other_times[MAX_EVENTS];
+static uint32_t pair_stack[MAX_EVENTS / 2];
+
+void profiling_spike_report(uint32_t begin_idx, uint32_t spike_us)
+{
+ int stack_pos = 0;
+
+ if ((s_event_tail - begin_idx) < 2) return; // not enough events
+ if ((s_event_tail - begin_idx) > MAX_EVENTS) return; // too many events
+
+ const uint64_t total_ns = delta_ns(&get_event(s_event_tail - 1)->ts, &get_event(begin_idx)->ts);
+
+ if (total_ns < (spike_us * 1000ULL)) return; // below threshold
+
+ for (uint32_t idx = begin_idx; idx != s_event_tail; idx++)
+ {
+ const uint32_t cyc_idx = idx % MAX_EVENTS;
+ Event *event = get_event(idx);
+
+ if (event->begin_idx == idx)
+ {
+ pair_stack[stack_pos] = cyc_idx;
+ inclusive_times[cyc_idx] = 0;
+ other_times[cyc_idx] = 0;
+ stack_pos++;
+ }
+ else
+ {
+ stack_pos--;
+ uint32_t span_idx = pair_stack[stack_pos];
+ const uint64_t inclusive_ns = delta_ns(&event->ts, &get_event(span_idx)->ts);
+ inclusive_times[span_idx] = inclusive_ns;
+ if (stack_pos > 0) other_times[pair_stack[stack_pos-1]] += inclusive_ns;
+ }
+ }
+
+ char label[256];
+ int indent = 0;
+ printf("\n%lluus spike over %uus limit.\n", total_ns / 1000ULL, spike_us);
+ printf("+----- Name -----------------------------------------+ Inc(us) + Exc(us) +\n");
+ for (uint32_t idx = begin_idx; idx != s_event_tail; idx++)
+ {
+ const uint32_t cyc_idx = idx % MAX_EVENTS;
+ Event *event = get_event(idx);
+
+ if (event->begin_idx == idx)
+ {
+ memset(label, ' ', indent);
+ strcpyz(label + indent, sizeof(label) - indent, event->name);
+ printf("| %-50s | %7llu | %7llu |\n", label, inclusive_times[cyc_idx] / 1000ULL, (inclusive_times[cyc_idx] - other_times[cyc_idx]) / 1000ULL);
+ indent += 2;
+ }
+ else
+ {
+ indent -= 2;
+ }
+ }
+ printf("+----------------------------------------------------+---------+---------+\n\n");
+ fflush(stdout);
+}
+
+#endif // PROFILING
\ No newline at end of file
diff --git a/profiling.h b/profiling.h
new file mode 100644
index 0000000..4374580
--- /dev/null
+++ b/profiling.h
@@ -0,0 +1,53 @@
+#ifndef PROFILING_H
+#define PROFILING_H 1
+
+#include
+
+#ifdef PROFILING
+
+uint32_t profiling_event_begin(const char *name);
+void profiling_event_end(uint32_t begin_idx, const char *name);
+void profiling_spike_report(uint32_t begin_idx, uint32_t spike_us);
+
+struct ProfilingScopedEvent
+{
+ const char *name;
+ uint32_t spike_us;
+ uint32_t begin_idx;
+
+ ProfilingScopedEvent(const char *name)
+ : name(name)
+ , spike_us(0)
+ {
+ begin_idx = profiling_event_begin(name);
+ }
+
+ ProfilingScopedEvent(const char *name, uint32_t spike_us)
+ : name(name)
+ , spike_us(spike_us)
+ {
+ begin_idx = profiling_event_begin(name);
+ }
+
+ ~ProfilingScopedEvent()
+ {
+ profiling_event_end(begin_idx, name);
+ if (spike_us > 0) profiling_spike_report(begin_idx, spike_us);
+ }
+};
+
+#define PROFILE_SCOPE(name) ProfilingScopedEvent __scope_timer(name)
+#define PROFILE_FUNCTION() ProfilingScopedEvent __scope_timer(__FUNCTION__)
+#define SPIKE_SCOPE(name, us) ProfilingScopedEvent __scope_timer(name, us)
+#define SPIKE_FUNCTION(us) ProfilingScopedEvent __scope_timer(__FUNCTION__, us)
+
+#else // PROFILING
+
+#define PROFILE_SCOPE(name)
+#define PROFILE_FUNCTION()
+#define SPIKE_SCOPE(name, us)
+#define SPIKE_FUNCTION(us)
+
+#endif // PROFILING
+
+#endif // PROFILING_H
diff --git a/scheduler.cpp b/scheduler.cpp
index 26979d6..919d31a 100644
--- a/scheduler.cpp
+++ b/scheduler.cpp
@@ -6,6 +6,7 @@
#include "input.h"
#include "fpga_io.h"
#include "osd.h"
+#include "profiling.h"
static cothread_t co_scheduler = nullptr;
static cothread_t co_poll = nullptr;
@@ -26,8 +27,11 @@ static void scheduler_co_poll(void)
{
scheduler_wait_fpga_ready();
- user_io_poll();
- input_poll(0);
+ {
+ SPIKE_SCOPE("co_poll", 1000);
+ user_io_poll();
+ input_poll(0);
+ }
scheduler_yield();
}
@@ -37,8 +41,11 @@ static void scheduler_co_ui(void)
{
for (;;)
{
- HandleUI();
- OsdUpdate();
+ {
+ SPIKE_SCOPE("co_ui", 1000);
+ HandleUI();
+ OsdUpdate();
+ }
scheduler_yield();
}
diff --git a/user_io.cpp b/user_io.cpp
index ba5dcd7..34f0aca 100644
--- a/user_io.cpp
+++ b/user_io.cpp
@@ -34,6 +34,7 @@
#include "audio.h"
#include "shmem.h"
#include "ide.h"
+#include "profiling.h"
#include "support.h"
@@ -2783,6 +2784,8 @@ static uint32_t res_timer = 0;
void user_io_poll()
{
+ PROFILE_FUNCTION();
+
if ((core_type != CORE_TYPE_SHARPMZ) &&
(core_type != CORE_TYPE_8BIT))
{
diff --git a/video.cpp b/video.cpp
index 59444d1..4642412 100644
--- a/video.cpp
+++ b/video.cpp
@@ -23,6 +23,7 @@
#include "shmem.h"
#include "smbus.h"
#include "str_util.h"
+#include "profiling.h"
#include "support.h"
#include "lib/imlib2/Imlib2.h"
@@ -237,6 +238,8 @@ static int findPLLpar(double Fout, uint32_t *pc, uint32_t *pm, double *pko)
static void setPLL(double Fout, vmode_custom_t *v)
{
+ PROFILE_FUNCTION();
+
double Fpix;
double fvco, ko;
uint32_t m, c;
@@ -436,6 +439,8 @@ static void send_phases(int addr, const FilterPhase phases[N_PHASES], bool full_
static void send_video_filters(const VideoFilter *horiz, const VideoFilter *vert, int ver)
{
+ PROFILE_FUNCTION();
+
spi_uio_cmd_cont(UIO_SET_FLTCOEF);
const bool full_precision = (ver & 0x4) != 0;
@@ -472,6 +477,8 @@ static void send_video_filters(const VideoFilter *horiz, const VideoFilter *vert
static void set_vfilter(int force)
{
+ PROFILE_FUNCTION();
+
static int last_flags = 0;
int flt_flags = spi_uio_cmd_cont(UIO_SET_FLTNUM);
@@ -514,6 +521,8 @@ static void set_vfilter(int force)
static void setScaler()
{
+ PROFILE_FUNCTION();
+
uint32_t arc[4] = {};
for (int i = 0; i < 2; i++)
{
@@ -569,6 +578,8 @@ void video_set_scaler_coeff(int type, const char *name)
static void loadScalerCfg()
{
+ PROFILE_FUNCTION();
+
sprintf(scaler_cfg, "%s_scaler.cfg", user_io_get_core_name());
memset(scaler_flt, 0, sizeof(scaler_cfg));
if (!FileLoadConfig(scaler_cfg, &scaler_flt, sizeof(scaler_flt)) || scaler_flt[0].mode > 1)
@@ -605,6 +616,8 @@ static char has_gamma = 0;
static void setGamma()
{
+ PROFILE_FUNCTION();
+
fileTextReader reader = {};
static char filename[1024];
@@ -686,6 +699,7 @@ void video_set_gamma_curve(const char *name)
static void loadGammaCfg()
{
+ PROFILE_FUNCTION();
sprintf(gamma_cfg_path, "%s_gamma.cfg", user_io_get_core_name());
if (!FileLoadConfig(gamma_cfg_path, &gamma_cfg, sizeof(gamma_cfg) - 1) || gamma_cfg[0]>1)
{
@@ -717,6 +731,8 @@ enum
static void setShadowMask()
{
+ PROFILE_FUNCTION();
+
static char filename[1024];
has_shadow_mask = 0;
@@ -860,6 +876,8 @@ void video_set_shadow_mask(const char *name)
static void loadShadowMaskCfg()
{
+ PROFILE_FUNCTION();
+
sprintf(shadow_mask_cfg_path, "%s_shmask.cfg", user_io_get_core_name());
if (!FileLoadConfig(shadow_mask_cfg_path, &shadow_mask_cfg, sizeof(shadow_mask_cfg) - 1))
{
@@ -1010,6 +1028,8 @@ static void hdmi_config_set_spare(bool val)
static void hdmi_config()
{
+ PROFILE_FUNCTION();
+
int ypbpr = cfg.ypbpr && cfg.direct_video;
const uint8_t vic_mode = (uint8_t)v_cur.param.vic;
uint8_t pr_flags;
@@ -1467,6 +1487,8 @@ static int get_edid_vmode(vmode_custom_t *v)
static void set_vrr_mode()
{
+ PROFILE_FUNCTION();
+
use_vrr = 0;
float vrateh = 100000000;
@@ -1636,6 +1658,8 @@ static void set_vrr_mode()
static char fb_reset_cmd[128] = {};
static void set_video(vmode_custom_t *v, double Fpix)
{
+ PROFILE_FUNCTION();
+
loadGammaCfg();
setGamma();
@@ -1759,8 +1783,12 @@ static void set_video(vmode_custom_t *v, double Fpix)
if (fb_enabled) video_fb_enable(1, fb_num);
- sprintf(fb_reset_cmd, "echo %d %d %d %d %d >/sys/module/MiSTer_fb/parameters/mode", 8888, 1, fb_width, fb_height, fb_width * 4);
- system(fb_reset_cmd);
+ {
+ PROFILE_SCOPE("Write MiSTer_fb");
+
+ sprintf(fb_reset_cmd, "echo %d %d %d %d %d >/sys/module/MiSTer_fb/parameters/mode", 8888, 1, fb_width, fb_height, fb_width * 4);
+ system(fb_reset_cmd);
+ }
loadShadowMaskCfg();
setShadowMask();
@@ -2307,6 +2335,8 @@ void video_mode_adjust()
void video_fb_enable(int enable, int n)
{
+ PROFILE_FUNCTION();
+
if (fb_base)
{
int res = spi_uio_cmd_cont(UIO_SET_FBUF);