diff --git a/Makefile b/Makefile index b8d3ef6..c8b7e1a 100644 --- a/Makefile +++ b/Makefile @@ -48,6 +48,10 @@ DFLAGS = $(INCLUDE) -D_7ZIP_ST -DPACKAGE_VERSION=\"1.3.3\" -DFLAC_API_EXPORTS -D CFLAGS = $(DFLAGS) -Wall -Wextra -Wno-strict-aliasing -Wno-stringop-overflow -Wno-stringop-truncation -Wno-format-truncation -Wno-psabi -Wno-restrict -c -O3 LFLAGS = -lc -lstdc++ -lm -lrt $(IMLIB2_LIB) -Llib/bluetooth -lbluetooth +ifeq ($(PROFILING),1) + DFLAGS += -DPROFILING +endif + $(PRJ): $(OBJ) $(Q)$(info $@) $(Q)$(CC) -o $@ $+ $(LFLAGS) diff --git a/input.cpp b/input.cpp index 62cbd03..b679132 100644 --- a/input.cpp +++ b/input.cpp @@ -27,6 +27,7 @@ #include "video.h" #include "joymapping.h" #include "support.h" +#include "profiling.h" #define NUMDEV 30 #define NUMPLAYERS 6 @@ -5178,6 +5179,8 @@ int input_test(int getchar) int input_poll(int getchar) { + PROFILE_FUNCTION(); + static int af[NUMPLAYERS] = {}; static uint32_t time[NUMPLAYERS] = {}; static uint32_t joy_prev[NUMPLAYERS] = {}; diff --git a/menu.cpp b/menu.cpp index 589ae20..3ad06e3 100644 --- a/menu.cpp +++ b/menu.cpp @@ -63,6 +63,7 @@ along with this program. If not, see . #include "support.h" #include "bootcore.h" #include "ide.h" +#include "profiling.h" /*menu states*/ enum MENU @@ -899,6 +900,8 @@ static int page = 0; void HandleUI(void) { + PROFILE_FUNCTION(); + if (bt_timer >= 0) { if (!bt_timer) bt_timer = (int32_t)GetTimer(6000); diff --git a/osd.cpp b/osd.cpp index 6cc7a38..cabd1ac 100644 --- a/osd.cpp +++ b/osd.cpp @@ -45,6 +45,7 @@ as rotated copies of the first 128 entries. -- AMR #include "logo.h" #include "user_io.h" #include "hardware.h" +#include "profiling.h" #include "support.h" @@ -661,6 +662,7 @@ char* OsdCoreNameGet() void OsdUpdate() { + PROFILE_FUNCTION(); int n = is_menu() ? 19 : osd_size; for (int i = 0; i < n; i++) { diff --git a/profiling.cpp b/profiling.cpp new file mode 100644 index 0000000..1943cb8 --- /dev/null +++ b/profiling.cpp @@ -0,0 +1,130 @@ +#ifdef PROFILING + +#include "profiling.h" + +#include "str_util.h" + +#include +#include +#include + +struct Event +{ + const char *name; + uint32_t begin_idx; + struct timespec ts; +}; + +static constexpr int MAX_EVENTS = 512; // must be pow2 +static Event s_events[MAX_EVENTS]; // circular buffer +static uint32_t s_event_tail = 0; + +static constexpr Event *get_event(uint32_t idx) +{ + return &s_events[idx % MAX_EVENTS]; +} + +uint32_t profiling_event_begin(const char *name) +{ + Event *newEvent = get_event(s_event_tail); + newEvent->begin_idx = s_event_tail; + newEvent->name = name; + clock_gettime(CLOCK_MONOTONIC, &newEvent->ts); + + uint32_t r = s_event_tail; + s_event_tail++; + return r; +} + +void profiling_event_end(uint32_t begin_idx, const char *name) +{ + Event *newEvent = get_event(s_event_tail); + newEvent->begin_idx = begin_idx; + newEvent->name = name; + clock_gettime(CLOCK_MONOTONIC, &newEvent->ts); + s_event_tail++; +} + +// result_ns = a - b +static uint64_t delta_ns(const struct timespec *a, const struct timespec *b) +{ + struct timespec ts; + + ts.tv_sec = a->tv_sec - b->tv_sec; + ts.tv_nsec = a->tv_nsec - b->tv_nsec; + if (ts.tv_nsec < 0) + { + ts.tv_nsec += 1000000000; + ts.tv_sec -= 1; + } + + uint64_t delta = ts.tv_sec * 1000000000ULL; + delta += ts.tv_nsec; + return delta; +} + + +// Bookkeeping data for spike report +static uint64_t inclusive_times[MAX_EVENTS]; +static uint64_t other_times[MAX_EVENTS]; +static uint32_t pair_stack[MAX_EVENTS / 2]; + +void profiling_spike_report(uint32_t begin_idx, uint32_t spike_us) +{ + int stack_pos = 0; + + if ((s_event_tail - begin_idx) < 2) return; // not enough events + if ((s_event_tail - begin_idx) > MAX_EVENTS) return; // too many events + + const uint64_t total_ns = delta_ns(&get_event(s_event_tail - 1)->ts, &get_event(begin_idx)->ts); + + if (total_ns < (spike_us * 1000ULL)) return; // below threshold + + for (uint32_t idx = begin_idx; idx != s_event_tail; idx++) + { + const uint32_t cyc_idx = idx % MAX_EVENTS; + Event *event = get_event(idx); + + if (event->begin_idx == idx) + { + pair_stack[stack_pos] = cyc_idx; + inclusive_times[cyc_idx] = 0; + other_times[cyc_idx] = 0; + stack_pos++; + } + else + { + stack_pos--; + uint32_t span_idx = pair_stack[stack_pos]; + const uint64_t inclusive_ns = delta_ns(&event->ts, &get_event(span_idx)->ts); + inclusive_times[span_idx] = inclusive_ns; + if (stack_pos > 0) other_times[pair_stack[stack_pos-1]] += inclusive_ns; + } + } + + char label[256]; + int indent = 0; + printf("\n%lluus spike over %uus limit.\n", total_ns / 1000ULL, spike_us); + printf("+----- Name -----------------------------------------+ Inc(us) + Exc(us) +\n"); + for (uint32_t idx = begin_idx; idx != s_event_tail; idx++) + { + const uint32_t cyc_idx = idx % MAX_EVENTS; + Event *event = get_event(idx); + + if (event->begin_idx == idx) + { + memset(label, ' ', indent); + strcpyz(label + indent, sizeof(label) - indent, event->name); + printf("| %-50s | %7llu | %7llu |\n", label, inclusive_times[cyc_idx] / 1000ULL, (inclusive_times[cyc_idx] - other_times[cyc_idx]) / 1000ULL); + indent += 2; + } + else + { + indent -= 2; + } + } + printf("+----------------------------------------------------+---------+---------+\n\n"); + fflush(stdout); +} + +#endif // PROFILING \ No newline at end of file diff --git a/profiling.h b/profiling.h new file mode 100644 index 0000000..4374580 --- /dev/null +++ b/profiling.h @@ -0,0 +1,53 @@ +#ifndef PROFILING_H +#define PROFILING_H 1 + +#include + +#ifdef PROFILING + +uint32_t profiling_event_begin(const char *name); +void profiling_event_end(uint32_t begin_idx, const char *name); +void profiling_spike_report(uint32_t begin_idx, uint32_t spike_us); + +struct ProfilingScopedEvent +{ + const char *name; + uint32_t spike_us; + uint32_t begin_idx; + + ProfilingScopedEvent(const char *name) + : name(name) + , spike_us(0) + { + begin_idx = profiling_event_begin(name); + } + + ProfilingScopedEvent(const char *name, uint32_t spike_us) + : name(name) + , spike_us(spike_us) + { + begin_idx = profiling_event_begin(name); + } + + ~ProfilingScopedEvent() + { + profiling_event_end(begin_idx, name); + if (spike_us > 0) profiling_spike_report(begin_idx, spike_us); + } +}; + +#define PROFILE_SCOPE(name) ProfilingScopedEvent __scope_timer(name) +#define PROFILE_FUNCTION() ProfilingScopedEvent __scope_timer(__FUNCTION__) +#define SPIKE_SCOPE(name, us) ProfilingScopedEvent __scope_timer(name, us) +#define SPIKE_FUNCTION(us) ProfilingScopedEvent __scope_timer(__FUNCTION__, us) + +#else // PROFILING + +#define PROFILE_SCOPE(name) +#define PROFILE_FUNCTION() +#define SPIKE_SCOPE(name, us) +#define SPIKE_FUNCTION(us) + +#endif // PROFILING + +#endif // PROFILING_H diff --git a/scheduler.cpp b/scheduler.cpp index 26979d6..919d31a 100644 --- a/scheduler.cpp +++ b/scheduler.cpp @@ -6,6 +6,7 @@ #include "input.h" #include "fpga_io.h" #include "osd.h" +#include "profiling.h" static cothread_t co_scheduler = nullptr; static cothread_t co_poll = nullptr; @@ -26,8 +27,11 @@ static void scheduler_co_poll(void) { scheduler_wait_fpga_ready(); - user_io_poll(); - input_poll(0); + { + SPIKE_SCOPE("co_poll", 1000); + user_io_poll(); + input_poll(0); + } scheduler_yield(); } @@ -37,8 +41,11 @@ static void scheduler_co_ui(void) { for (;;) { - HandleUI(); - OsdUpdate(); + { + SPIKE_SCOPE("co_ui", 1000); + HandleUI(); + OsdUpdate(); + } scheduler_yield(); } diff --git a/user_io.cpp b/user_io.cpp index ba5dcd7..34f0aca 100644 --- a/user_io.cpp +++ b/user_io.cpp @@ -34,6 +34,7 @@ #include "audio.h" #include "shmem.h" #include "ide.h" +#include "profiling.h" #include "support.h" @@ -2783,6 +2784,8 @@ static uint32_t res_timer = 0; void user_io_poll() { + PROFILE_FUNCTION(); + if ((core_type != CORE_TYPE_SHARPMZ) && (core_type != CORE_TYPE_8BIT)) { diff --git a/video.cpp b/video.cpp index 59444d1..4642412 100644 --- a/video.cpp +++ b/video.cpp @@ -23,6 +23,7 @@ #include "shmem.h" #include "smbus.h" #include "str_util.h" +#include "profiling.h" #include "support.h" #include "lib/imlib2/Imlib2.h" @@ -237,6 +238,8 @@ static int findPLLpar(double Fout, uint32_t *pc, uint32_t *pm, double *pko) static void setPLL(double Fout, vmode_custom_t *v) { + PROFILE_FUNCTION(); + double Fpix; double fvco, ko; uint32_t m, c; @@ -436,6 +439,8 @@ static void send_phases(int addr, const FilterPhase phases[N_PHASES], bool full_ static void send_video_filters(const VideoFilter *horiz, const VideoFilter *vert, int ver) { + PROFILE_FUNCTION(); + spi_uio_cmd_cont(UIO_SET_FLTCOEF); const bool full_precision = (ver & 0x4) != 0; @@ -472,6 +477,8 @@ static void send_video_filters(const VideoFilter *horiz, const VideoFilter *vert static void set_vfilter(int force) { + PROFILE_FUNCTION(); + static int last_flags = 0; int flt_flags = spi_uio_cmd_cont(UIO_SET_FLTNUM); @@ -514,6 +521,8 @@ static void set_vfilter(int force) static void setScaler() { + PROFILE_FUNCTION(); + uint32_t arc[4] = {}; for (int i = 0; i < 2; i++) { @@ -569,6 +578,8 @@ void video_set_scaler_coeff(int type, const char *name) static void loadScalerCfg() { + PROFILE_FUNCTION(); + sprintf(scaler_cfg, "%s_scaler.cfg", user_io_get_core_name()); memset(scaler_flt, 0, sizeof(scaler_cfg)); if (!FileLoadConfig(scaler_cfg, &scaler_flt, sizeof(scaler_flt)) || scaler_flt[0].mode > 1) @@ -605,6 +616,8 @@ static char has_gamma = 0; static void setGamma() { + PROFILE_FUNCTION(); + fileTextReader reader = {}; static char filename[1024]; @@ -686,6 +699,7 @@ void video_set_gamma_curve(const char *name) static void loadGammaCfg() { + PROFILE_FUNCTION(); sprintf(gamma_cfg_path, "%s_gamma.cfg", user_io_get_core_name()); if (!FileLoadConfig(gamma_cfg_path, &gamma_cfg, sizeof(gamma_cfg) - 1) || gamma_cfg[0]>1) { @@ -717,6 +731,8 @@ enum static void setShadowMask() { + PROFILE_FUNCTION(); + static char filename[1024]; has_shadow_mask = 0; @@ -860,6 +876,8 @@ void video_set_shadow_mask(const char *name) static void loadShadowMaskCfg() { + PROFILE_FUNCTION(); + sprintf(shadow_mask_cfg_path, "%s_shmask.cfg", user_io_get_core_name()); if (!FileLoadConfig(shadow_mask_cfg_path, &shadow_mask_cfg, sizeof(shadow_mask_cfg) - 1)) { @@ -1010,6 +1028,8 @@ static void hdmi_config_set_spare(bool val) static void hdmi_config() { + PROFILE_FUNCTION(); + int ypbpr = cfg.ypbpr && cfg.direct_video; const uint8_t vic_mode = (uint8_t)v_cur.param.vic; uint8_t pr_flags; @@ -1467,6 +1487,8 @@ static int get_edid_vmode(vmode_custom_t *v) static void set_vrr_mode() { + PROFILE_FUNCTION(); + use_vrr = 0; float vrateh = 100000000; @@ -1636,6 +1658,8 @@ static void set_vrr_mode() static char fb_reset_cmd[128] = {}; static void set_video(vmode_custom_t *v, double Fpix) { + PROFILE_FUNCTION(); + loadGammaCfg(); setGamma(); @@ -1759,8 +1783,12 @@ static void set_video(vmode_custom_t *v, double Fpix) if (fb_enabled) video_fb_enable(1, fb_num); - sprintf(fb_reset_cmd, "echo %d %d %d %d %d >/sys/module/MiSTer_fb/parameters/mode", 8888, 1, fb_width, fb_height, fb_width * 4); - system(fb_reset_cmd); + { + PROFILE_SCOPE("Write MiSTer_fb"); + + sprintf(fb_reset_cmd, "echo %d %d %d %d %d >/sys/module/MiSTer_fb/parameters/mode", 8888, 1, fb_width, fb_height, fb_width * 4); + system(fb_reset_cmd); + } loadShadowMaskCfg(); setShadowMask(); @@ -2307,6 +2335,8 @@ void video_mode_adjust() void video_fb_enable(int enable, int n) { + PROFILE_FUNCTION(); + if (fb_base) { int res = spi_uio_cmd_cont(UIO_SET_FBUF);