diff --git a/Makefile b/Makefile index b8d3ef6..de8048b 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,11 @@ DEP = $(C_SRC:.c=.c.d) $(CPP_SRC:.cpp=.cpp.d) DFLAGS = $(INCLUDE) -D_7ZIP_ST -DPACKAGE_VERSION=\"1.3.3\" -DFLAC_API_EXPORTS -DFLAC__HAS_OGG=0 -DHAVE_LROUND -DHAVE_STDINT_H -DHAVE_STDLIB_H -DHAVE_SYS_PARAM_H -DENABLE_64_BIT_WORDS=0 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -DVDATE=\"`date +"%y%m%d"`\" CFLAGS = $(DFLAGS) -Wall -Wextra -Wno-strict-aliasing -Wno-stringop-overflow -Wno-stringop-truncation -Wno-format-truncation -Wno-psabi -Wno-restrict -c -O3 -LFLAGS = -lc -lstdc++ -lm -lrt $(IMLIB2_LIB) -Llib/bluetooth -lbluetooth +LFLAGS = -lc -lstdc++ -lm -lrt $(IMLIB2_LIB) -Llib/bluetooth -lbluetooth -lpthread + +ifeq ($(PROFILING),1) + DFLAGS += -DPROFILING +endif $(PRJ): $(OBJ) $(Q)$(info $@) diff --git a/MiSTer.vcxproj b/MiSTer.vcxproj index 74b14cc..b157e17 100644 --- a/MiSTer.vcxproj +++ b/MiSTer.vcxproj @@ -73,13 +73,16 @@ + + + @@ -142,13 +145,16 @@ + + + diff --git a/MiSTer.vcxproj.filters b/MiSTer.vcxproj.filters index c3e2751..7046df0 100644 --- a/MiSTer.vcxproj.filters +++ b/MiSTer.vcxproj.filters @@ -223,6 +223,15 @@ Source Files + + Source Files + + + Source Files + + + Source Files + @@ -432,5 +441,14 @@ Header Files + + Header Files + + + Header Files + + + Header Files + \ No newline at end of file diff --git a/file_io.cpp b/file_io.cpp index 464b4e1..7351734 100644 --- a/file_io.cpp +++ b/file_io.cpp @@ -1105,7 +1105,7 @@ void FindStorage(void) device = 0; cfg_parse(); device = saveddev; - video_mode_load(); + video_init(); user_io_send_buttons(1); printf("Waiting for USB...\n"); diff --git a/fpga_io.cpp b/fpga_io.cpp index 7045679..51c4811 100644 --- a/fpga_io.cpp +++ b/fpga_io.cpp @@ -15,6 +15,7 @@ #include "osd.h" #include "menu.h" #include "shmem.h" +#include "offload.h" #include "fpga_base_addr_ac5.h" #include "fpga_manager.h" @@ -617,6 +618,8 @@ void app_restart(const char *path, const char *xml) input_switch(0); input_uinp_destroy(); + offload_stop(); + char *appname = getappname(); printf("restarting the %s\n", appname); execl(appname, appname, path, xml, NULL); diff --git a/input.cpp b/input.cpp index 62cbd03..b679132 100644 --- a/input.cpp +++ b/input.cpp @@ -27,6 +27,7 @@ #include "video.h" #include "joymapping.h" #include "support.h" +#include "profiling.h" #define NUMDEV 30 #define NUMPLAYERS 6 @@ -5178,6 +5179,8 @@ int input_test(int getchar) int input_poll(int getchar) { + PROFILE_FUNCTION(); + static int af[NUMPLAYERS] = {}; static uint32_t time[NUMPLAYERS] = {}; static uint32_t joy_prev[NUMPLAYERS] = {}; diff --git a/main.cpp b/main.cpp index f9109d2..99c03eb 100644 --- a/main.cpp +++ b/main.cpp @@ -32,6 +32,7 @@ along with this program. If not, see . #include "fpga_io.h" #include "scheduler.h" #include "osd.h" +#include "offload.h" const char *version = "$VER:" VDATE; @@ -45,6 +46,8 @@ int main(int argc, char *argv[]) CPU_SET(1, &set); sched_setaffinity(0, sizeof(set), &set); + offload_start(); + fpga_io_init(); DISKLED_OFF; diff --git a/menu.cpp b/menu.cpp index 589ae20..3ad06e3 100644 --- a/menu.cpp +++ b/menu.cpp @@ -63,6 +63,7 @@ along with this program. If not, see . #include "support.h" #include "bootcore.h" #include "ide.h" +#include "profiling.h" /*menu states*/ enum MENU @@ -899,6 +900,8 @@ static int page = 0; void HandleUI(void) { + PROFILE_FUNCTION(); + if (bt_timer >= 0) { if (!bt_timer) bt_timer = (int32_t)GetTimer(6000); diff --git a/offload.cpp b/offload.cpp new file mode 100644 index 0000000..e25397d --- /dev/null +++ b/offload.cpp @@ -0,0 +1,122 @@ +#include "offload.h" +#include "profiling.h" +#include +#include +#include +#include + +static constexpr uint32_t QUEUE_SIZE = 8; + +static pthread_t s_thread_handle; +static pthread_cond_t s_cond_work, s_cond_available; +static pthread_mutex_t s_queue_lock; + +struct Work +{ + std::function handler; +}; + +static Work s_queue[QUEUE_SIZE]; +static uint32_t s_queue_head, s_queue_tail; +static bool s_quit; + +static void *worker_thread(void *) +{ + while (true) + { + Work *current_work = nullptr; + // Wait for work + pthread_mutex_lock(&s_queue_lock); + if (s_queue_head == s_queue_tail) + { + // queue empty and quit flag set, exit + if (s_quit) + { + pthread_mutex_unlock(&s_queue_lock); + break; + } + + // wait for work signal + pthread_cond_wait(&s_cond_work, &s_queue_lock); + + // quit flag was set and queue still empty, quit + if (s_quit && (s_queue_head == s_queue_tail)) + { + pthread_mutex_unlock(&s_queue_lock); + break; + } + } + + // get work + current_work = &s_queue[s_queue_tail % QUEUE_SIZE]; + pthread_mutex_unlock(&s_queue_lock); + + // execute + current_work->handler(); + current_work->handler = nullptr; + + // lock and move tail forward + pthread_mutex_lock(&s_queue_lock); + s_queue_tail++; + pthread_cond_signal(&s_cond_available); + pthread_mutex_unlock(&s_queue_lock); + } + return (void *)0; +} + +void offload_start() +{ + pthread_cond_init(&s_cond_available, nullptr); + pthread_cond_init(&s_cond_work, nullptr); + pthread_mutex_init(&s_queue_lock, nullptr); + + s_queue_head = s_queue_tail = 0; + s_quit = false; + + pthread_attr_t attr; + + pthread_attr_init(&attr); + + // Set affinity to core #0 since main runs on core #1 + cpu_set_t set; + CPU_ZERO(&set); + CPU_SET(0, &set); + pthread_attr_setaffinity_np(&attr, sizeof(set), &set); + + pthread_create(&s_thread_handle, &attr, worker_thread, nullptr); +} + +void offload_stop() +{ + pthread_mutex_lock(&s_queue_lock); + + s_quit = true; + pthread_cond_signal(&s_cond_work); + + pthread_mutex_unlock(&s_queue_lock); + + printf("Waiting for offloaded work to finish..."); + pthread_join(s_thread_handle, nullptr); + printf("Done\n"); +} + +void offload_add_work(std::function handler) +{ + PROFILE_FUNCTION(); + + pthread_mutex_lock(&s_queue_lock); + + if ((s_queue_head - s_queue_tail) == QUEUE_SIZE) + { + pthread_cond_wait(&s_cond_available, &s_queue_lock); + } + + Work *work = &s_queue[s_queue_head % QUEUE_SIZE]; + work->handler = handler; + + s_queue_head++; + + pthread_cond_signal(&s_cond_work); + + pthread_mutex_unlock(&s_queue_lock); +} \ No newline at end of file diff --git a/offload.h b/offload.h new file mode 100644 index 0000000..a31ec04 --- /dev/null +++ b/offload.h @@ -0,0 +1,12 @@ +#ifndef OFFLOAD_H +#define OFFLOAD_H + +#include +#include + +void offload_start(); +void offload_stop(); + +void offload_add_work(std::function work); + +#endif \ No newline at end of file diff --git a/osd.cpp b/osd.cpp index 6cc7a38..cabd1ac 100644 --- a/osd.cpp +++ b/osd.cpp @@ -45,6 +45,7 @@ as rotated copies of the first 128 entries. -- AMR #include "logo.h" #include "user_io.h" #include "hardware.h" +#include "profiling.h" #include "support.h" @@ -661,6 +662,7 @@ char* OsdCoreNameGet() void OsdUpdate() { + PROFILE_FUNCTION(); int n = is_menu() ? 19 : osd_size; for (int i = 0; i < n; i++) { diff --git a/profiling.cpp b/profiling.cpp new file mode 100644 index 0000000..1943cb8 --- /dev/null +++ b/profiling.cpp @@ -0,0 +1,130 @@ +#ifdef PROFILING + +#include "profiling.h" + +#include "str_util.h" + +#include +#include +#include + +struct Event +{ + const char *name; + uint32_t begin_idx; + struct timespec ts; +}; + +static constexpr int MAX_EVENTS = 512; // must be pow2 +static Event s_events[MAX_EVENTS]; // circular buffer +static uint32_t s_event_tail = 0; + +static constexpr Event *get_event(uint32_t idx) +{ + return &s_events[idx % MAX_EVENTS]; +} + +uint32_t profiling_event_begin(const char *name) +{ + Event *newEvent = get_event(s_event_tail); + newEvent->begin_idx = s_event_tail; + newEvent->name = name; + clock_gettime(CLOCK_MONOTONIC, &newEvent->ts); + + uint32_t r = s_event_tail; + s_event_tail++; + return r; +} + +void profiling_event_end(uint32_t begin_idx, const char *name) +{ + Event *newEvent = get_event(s_event_tail); + newEvent->begin_idx = begin_idx; + newEvent->name = name; + clock_gettime(CLOCK_MONOTONIC, &newEvent->ts); + s_event_tail++; +} + +// result_ns = a - b +static uint64_t delta_ns(const struct timespec *a, const struct timespec *b) +{ + struct timespec ts; + + ts.tv_sec = a->tv_sec - b->tv_sec; + ts.tv_nsec = a->tv_nsec - b->tv_nsec; + if (ts.tv_nsec < 0) + { + ts.tv_nsec += 1000000000; + ts.tv_sec -= 1; + } + + uint64_t delta = ts.tv_sec * 1000000000ULL; + delta += ts.tv_nsec; + return delta; +} + + +// Bookkeeping data for spike report +static uint64_t inclusive_times[MAX_EVENTS]; +static uint64_t other_times[MAX_EVENTS]; +static uint32_t pair_stack[MAX_EVENTS / 2]; + +void profiling_spike_report(uint32_t begin_idx, uint32_t spike_us) +{ + int stack_pos = 0; + + if ((s_event_tail - begin_idx) < 2) return; // not enough events + if ((s_event_tail - begin_idx) > MAX_EVENTS) return; // too many events + + const uint64_t total_ns = delta_ns(&get_event(s_event_tail - 1)->ts, &get_event(begin_idx)->ts); + + if (total_ns < (spike_us * 1000ULL)) return; // below threshold + + for (uint32_t idx = begin_idx; idx != s_event_tail; idx++) + { + const uint32_t cyc_idx = idx % MAX_EVENTS; + Event *event = get_event(idx); + + if (event->begin_idx == idx) + { + pair_stack[stack_pos] = cyc_idx; + inclusive_times[cyc_idx] = 0; + other_times[cyc_idx] = 0; + stack_pos++; + } + else + { + stack_pos--; + uint32_t span_idx = pair_stack[stack_pos]; + const uint64_t inclusive_ns = delta_ns(&event->ts, &get_event(span_idx)->ts); + inclusive_times[span_idx] = inclusive_ns; + if (stack_pos > 0) other_times[pair_stack[stack_pos-1]] += inclusive_ns; + } + } + + char label[256]; + int indent = 0; + printf("\n%lluus spike over %uus limit.\n", total_ns / 1000ULL, spike_us); + printf("+----- Name -----------------------------------------+ Inc(us) + Exc(us) +\n"); + for (uint32_t idx = begin_idx; idx != s_event_tail; idx++) + { + const uint32_t cyc_idx = idx % MAX_EVENTS; + Event *event = get_event(idx); + + if (event->begin_idx == idx) + { + memset(label, ' ', indent); + strcpyz(label + indent, sizeof(label) - indent, event->name); + printf("| %-50s | %7llu | %7llu |\n", label, inclusive_times[cyc_idx] / 1000ULL, (inclusive_times[cyc_idx] - other_times[cyc_idx]) / 1000ULL); + indent += 2; + } + else + { + indent -= 2; + } + } + printf("+----------------------------------------------------+---------+---------+\n\n"); + fflush(stdout); +} + +#endif // PROFILING \ No newline at end of file diff --git a/profiling.h b/profiling.h new file mode 100644 index 0000000..4374580 --- /dev/null +++ b/profiling.h @@ -0,0 +1,53 @@ +#ifndef PROFILING_H +#define PROFILING_H 1 + +#include + +#ifdef PROFILING + +uint32_t profiling_event_begin(const char *name); +void profiling_event_end(uint32_t begin_idx, const char *name); +void profiling_spike_report(uint32_t begin_idx, uint32_t spike_us); + +struct ProfilingScopedEvent +{ + const char *name; + uint32_t spike_us; + uint32_t begin_idx; + + ProfilingScopedEvent(const char *name) + : name(name) + , spike_us(0) + { + begin_idx = profiling_event_begin(name); + } + + ProfilingScopedEvent(const char *name, uint32_t spike_us) + : name(name) + , spike_us(spike_us) + { + begin_idx = profiling_event_begin(name); + } + + ~ProfilingScopedEvent() + { + profiling_event_end(begin_idx, name); + if (spike_us > 0) profiling_spike_report(begin_idx, spike_us); + } +}; + +#define PROFILE_SCOPE(name) ProfilingScopedEvent __scope_timer(name) +#define PROFILE_FUNCTION() ProfilingScopedEvent __scope_timer(__FUNCTION__) +#define SPIKE_SCOPE(name, us) ProfilingScopedEvent __scope_timer(name, us) +#define SPIKE_FUNCTION(us) ProfilingScopedEvent __scope_timer(__FUNCTION__, us) + +#else // PROFILING + +#define PROFILE_SCOPE(name) +#define PROFILE_FUNCTION() +#define SPIKE_SCOPE(name, us) +#define SPIKE_FUNCTION(us) + +#endif // PROFILING + +#endif // PROFILING_H diff --git a/scheduler.cpp b/scheduler.cpp index 26979d6..919d31a 100644 --- a/scheduler.cpp +++ b/scheduler.cpp @@ -6,6 +6,7 @@ #include "input.h" #include "fpga_io.h" #include "osd.h" +#include "profiling.h" static cothread_t co_scheduler = nullptr; static cothread_t co_poll = nullptr; @@ -26,8 +27,11 @@ static void scheduler_co_poll(void) { scheduler_wait_fpga_ready(); - user_io_poll(); - input_poll(0); + { + SPIKE_SCOPE("co_poll", 1000); + user_io_poll(); + input_poll(0); + } scheduler_yield(); } @@ -37,8 +41,11 @@ static void scheduler_co_ui(void) { for (;;) { - HandleUI(); - OsdUpdate(); + { + SPIKE_SCOPE("co_ui", 1000); + HandleUI(); + OsdUpdate(); + } scheduler_yield(); } diff --git a/user_io.cpp b/user_io.cpp index ba5dcd7..d6e075a 100644 --- a/user_io.cpp +++ b/user_io.cpp @@ -34,6 +34,7 @@ #include "audio.h" #include "shmem.h" #include "ide.h" +#include "profiling.h" #include "support.h" @@ -1321,7 +1322,7 @@ void user_io_init(const char *path, const char *xml) bootcore_init(xml ? xml : path); } - video_mode_load(); + video_init(); if (strlen(cfg.font)) LoadFont(cfg.font); load_volume(); @@ -2783,6 +2784,8 @@ static uint32_t res_timer = 0; void user_io_poll() { + PROFILE_FUNCTION(); + if ((core_type != CORE_TYPE_SHARPMZ) && (core_type != CORE_TYPE_8BIT)) { diff --git a/video.cpp b/video.cpp index 59444d1..f15ed15 100644 --- a/video.cpp +++ b/video.cpp @@ -23,9 +23,12 @@ #include "shmem.h" #include "smbus.h" #include "str_util.h" +#include "profiling.h" +#include "offload.h" #include "support.h" #include "lib/imlib2/Imlib2.h" +#include "lib/md5/md5.h" #define FB_SIZE (1920*1080) #define FB_ADDR (0x20000000 + (32*1024*1024)) // 512mb + 32mb(Core's fb) @@ -90,6 +93,7 @@ static vrr_cap_t vrr_modes[3] = { static uint8_t last_vrr_mode = 0xFF; static float last_vrr_rate = 0.0f; +static uint32_t last_vrr_vfp = 0; static uint8_t edid[256] = {}; struct vmode_t @@ -172,6 +176,10 @@ struct vmode_custom_t static_assert(sizeof(vmode_custom_param_t) == sizeof(vmode_custom_t::item)); +// Static fwd decl +static void video_fb_config(); +static void video_calculate_cvt(int horiz_pixels, int vert_pixels, float refresh_rate, int reduced_blanking, vmode_custom_t *vmode); + static vmode_custom_t v_cur = {}, v_def = {}, v_pal = {}, v_ntsc = {}; static int vmode_def = 0, vmode_pal = 0, vmode_ntsc = 0; @@ -189,8 +197,6 @@ static bool supports_vrr() return video_version != 0; } -static void video_calculate_cvt(int horiz_pixels, int vert_pixels, float refresh_rate, int reduced_blanking, vmode_custom_t *vmode); - static uint32_t getPLLdiv(uint32_t div) { if (div & 1) return 0x20000 | (((div / 2) + 1) << 8) | (div / 2); @@ -237,6 +243,8 @@ static int findPLLpar(double Fout, uint32_t *pc, uint32_t *pm, double *pko) static void setPLL(double Fout, vmode_custom_t *v) { + PROFILE_FUNCTION(); + double Fpix; double fvco, ko; uint32_t m, c; @@ -303,13 +311,25 @@ struct FilterPhase static constexpr int N_PHASES = 256; +struct VideoFilterDigest +{ + VideoFilterDigest() { memset(md5, 0, sizeof(md5)); } + bool operator!=(const VideoFilterDigest& other) { return memcmp(md5, other.md5, sizeof(md5)) != 0; } + bool operator==(const VideoFilterDigest& other) { return memcmp(md5, other.md5, sizeof(md5)) == 0; } + + unsigned char md5[16]; +}; + struct VideoFilter { bool is_adaptive; FilterPhase phases[N_PHASES]; FilterPhase adaptive_phases[N_PHASES]; + VideoFilterDigest digest; }; +static VideoFilter scaler_flt_data[3]; + static bool scale_phases(FilterPhase out_phases[N_PHASES], FilterPhase *in_phases, int in_count) { if (!in_count) @@ -337,12 +357,16 @@ static bool scale_phases(FilterPhase out_phases[N_PHASES], FilterPhase *in_phase static bool read_video_filter(int type, VideoFilter *out) { + PROFILE_FUNCTION(); + fileTextReader reader = {}; FilterPhase phases[512]; int count = 0; bool is_adaptive = false; int scale = 2; + memset(out, 0, sizeof(VideoFilter)); + static char filename[1024]; snprintf(filename, sizeof(filename), COEFF_DIR"/%s", scaler_flt[type].filename); @@ -382,29 +406,50 @@ static bool read_video_filter(int type, VideoFilter *out) is_adaptive ? count / 2 : count, is_adaptive ? "true" : "false" ); + bool valid = false; if (is_adaptive) { out->is_adaptive = true; - bool valid = scale_phases(out->phases, phases, count / 2); + valid = scale_phases(out->phases, phases, count / 2); valid = valid && scale_phases(out->adaptive_phases, phases + (count / 2), count / 2); - return valid; } else if (count == 32 && !is_adaptive) // legacy { out->is_adaptive = false; - return scale_phases(out->phases, phases, 16); + valid = scale_phases(out->phases, phases, 16); } else if (!is_adaptive) { out->is_adaptive = false; - return scale_phases(out->phases, phases, count); + valid = scale_phases(out->phases, phases, count); + } + else + { + // Make a default NN filter in case of error + out->is_adaptive = false; + FilterPhase nn_phases[2] = + { + { .t = { 0, 256, 0, 0 } }, + { .t = { 0, 0, 256, 0 } } + }; + scale_phases(out->phases, nn_phases, 2); + valid = false; } - return false; + MD5Context ctx; + MD5Init(&ctx); + MD5Update(&ctx, (unsigned char *)&out->is_adaptive, sizeof(VideoFilter::is_adaptive)); + MD5Update(&ctx, (unsigned char *)out->phases, sizeof(VideoFilter::phases)); + MD5Update(&ctx, (unsigned char *)out->adaptive_phases, sizeof(VideoFilter::adaptive_phases)); + MD5Final(out->digest.md5, &ctx); + + return valid; } static void send_phases_legacy(int addr, const FilterPhase phases[N_PHASES]) { + PROFILE_FUNCTION(); + for (int idx = 0; idx < N_PHASES; idx += 16) { const FilterPhase *p = &phases[idx]; @@ -418,6 +463,8 @@ static void send_phases_legacy(int addr, const FilterPhase phases[N_PHASES]) static void send_phases(int addr, const FilterPhase phases[N_PHASES], bool full_precision) { + PROFILE_FUNCTION(); + const int skip = full_precision ? 1 : 4; const int shift = full_precision ? 0 : 1; @@ -434,31 +481,38 @@ static void send_phases(int addr, const FilterPhase phases[N_PHASES], bool full_ } } +static VideoFilterDigest horiz_filter_digest, vert_filter_digest; + static void send_video_filters(const VideoFilter *horiz, const VideoFilter *vert, int ver) { + PROFILE_FUNCTION(); + spi_uio_cmd_cont(UIO_SET_FLTCOEF); const bool full_precision = (ver & 0x4) != 0; + const bool send_horiz = horiz_filter_digest != horiz->digest; + const bool send_vert = vert_filter_digest != vert->digest; + switch( ver & 0x3 ) { case 1: - send_phases_legacy(0, horiz->phases); - send_phases_legacy(64, vert->phases); + if (send_horiz) send_phases_legacy(0, horiz->phases); + if (send_vert) send_phases_legacy(64, vert->phases); break; case 2: - send_phases(0, horiz->phases, full_precision); - send_phases(1, vert->phases, full_precision); + if (send_horiz) send_phases(0, horiz->phases, full_precision); + if (send_vert) send_phases(1, vert->phases, full_precision); break; case 3: - send_phases(0, horiz->phases, full_precision); - send_phases(1, vert->phases, full_precision); + if (send_horiz) send_phases(0, horiz->phases, full_precision); + if (send_vert) send_phases(1, vert->phases, full_precision); - if (horiz->is_adaptive) + if (horiz->is_adaptive && send_horiz) { send_phases(2, horiz->adaptive_phases, full_precision); } - else if (vert->is_adaptive) + else if (vert->is_adaptive && send_vert) { send_phases(3, vert->adaptive_phases, full_precision); } @@ -467,11 +521,16 @@ static void send_video_filters(const VideoFilter *horiz, const VideoFilter *vert break; } + horiz_filter_digest = horiz->digest; + vert_filter_digest = vert->digest; + DisableIO(); } static void set_vfilter(int force) { + PROFILE_FUNCTION(); + static int last_flags = 0; int flt_flags = spi_uio_cmd_cont(UIO_SET_FLTNUM); @@ -487,33 +546,19 @@ static void set_vfilter(int force) spi8(scaler_flt[0].mode); DisableIO(); - VideoFilter horiz, vert; + int vert_flt; + if (current_video_info.interlaced) vert_flt = VFILTER_HORZ; + else if ((flt_flags & 0x30) && scaler_flt[VFILTER_SCAN].mode) vert_flt = VFILTER_SCAN; + else if (scaler_flt[VFILTER_VERT].mode) vert_flt = VFILTER_VERT; + else vert_flt = VFILTER_HORZ; - //horizontal filter - bool valid = read_video_filter(VFILTER_HORZ, &horiz); - if (valid) - { - //vertical/scanlines filter - int vert_flt; - if (current_video_info.interlaced) vert_flt = VFILTER_HORZ; - else if ((flt_flags & 0x30) && scaler_flt[VFILTER_SCAN].mode) vert_flt = VFILTER_SCAN; - else if (scaler_flt[VFILTER_VERT].mode) vert_flt = VFILTER_VERT; - else vert_flt = VFILTER_HORZ; - - if (!read_video_filter(vert_flt, &vert)) - { - vert = horiz; - valid = true; - } - - send_video_filters(&horiz, &vert, flt_flags & 0xF); - } - - if (!valid) spi_uio_cmd8(UIO_SET_FLTNUM, 0); + send_video_filters(&scaler_flt_data[VFILTER_HORZ], &scaler_flt_data[vert_flt], flt_flags & 0xF); } static void setScaler() { + PROFILE_FUNCTION(); + uint32_t arc[4] = {}; for (int i = 0; i < 2; i++) { @@ -563,12 +608,15 @@ void video_set_scaler_coeff(int type, const char *name) { strcpy(scaler_flt[type].filename, name); FileSaveConfig(scaler_cfg, &scaler_flt, sizeof(scaler_flt)); + read_video_filter(type, &scaler_flt_data[type]); setScaler(); user_io_send_buttons(1); } static void loadScalerCfg() { + PROFILE_FUNCTION(); + sprintf(scaler_cfg, "%s_scaler.cfg", user_io_get_core_name()); memset(scaler_flt, 0, sizeof(scaler_cfg)); if (!FileLoadConfig(scaler_cfg, &scaler_flt, sizeof(scaler_flt)) || scaler_flt[0].mode > 1) @@ -594,17 +642,21 @@ static void loadScalerCfg() scaler_flt[VFILTER_SCAN].mode = 1; } - VideoFilter null; - if (!read_video_filter(VFILTER_HORZ, &null)) memset(&scaler_flt[VFILTER_HORZ], 0, sizeof(scaler_flt[VFILTER_HORZ])); - if (!read_video_filter(VFILTER_VERT, &null)) memset(&scaler_flt[VFILTER_VERT], 0, sizeof(scaler_flt[VFILTER_VERT])); - if (!read_video_filter(VFILTER_SCAN, &null)) memset(&scaler_flt[VFILTER_SCAN], 0, sizeof(scaler_flt[VFILTER_SCAN])); + if (!read_video_filter(VFILTER_HORZ, &scaler_flt_data[VFILTER_HORZ])) memset(&scaler_flt[VFILTER_HORZ], 0, sizeof(scaler_flt[VFILTER_HORZ])); + if (!read_video_filter(VFILTER_VERT, &scaler_flt_data[VFILTER_VERT])) memset(&scaler_flt[VFILTER_VERT], 0, sizeof(scaler_flt[VFILTER_VERT])); + if (!read_video_filter(VFILTER_SCAN, &scaler_flt_data[VFILTER_SCAN])) memset(&scaler_flt[VFILTER_SCAN], 0, sizeof(scaler_flt[VFILTER_SCAN])); } +static char active_gamma_cfg[1024] = { 0 }; static char gamma_cfg[1024] = { 0 }; static char has_gamma = 0; static void setGamma() { + PROFILE_FUNCTION(); + + if (!memcmp(active_gamma_cfg, gamma_cfg, sizeof(gamma_cfg))) return; + fileTextReader reader = {}; static char filename[1024]; @@ -617,6 +669,7 @@ static void setGamma() has_gamma = 1; spi8(0); DisableIO(); + snprintf(filename, sizeof(filename), GAMMA_DIR"/%s", gamma_cfg + 1); if (FileOpenTextReader(&reader, filename)) @@ -649,6 +702,7 @@ static void setGamma() DisableIO(); spi_uio_cmd8(UIO_SET_GAMMA, gamma_cfg[0]); } + memcpy(active_gamma_cfg, gamma_cfg, sizeof(gamma_cfg)); } int video_get_gamma_en() @@ -686,6 +740,7 @@ void video_set_gamma_curve(const char *name) static void loadGammaCfg() { + PROFILE_FUNCTION(); sprintf(gamma_cfg_path, "%s_gamma.cfg", user_io_get_core_name()); if (!FileLoadConfig(gamma_cfg_path, &gamma_cfg, sizeof(gamma_cfg) - 1) || gamma_cfg[0]>1) { @@ -717,6 +772,8 @@ enum static void setShadowMask() { + PROFILE_FUNCTION(); + static char filename[1024]; has_shadow_mask = 0; @@ -860,6 +917,8 @@ void video_set_shadow_mask(const char *name) static void loadShadowMaskCfg() { + PROFILE_FUNCTION(); + sprintf(shadow_mask_cfg_path, "%s_shmask.cfg", user_io_get_core_name()); if (!FileLoadConfig(shadow_mask_cfg_path, &shadow_mask_cfg, sizeof(shadow_mask_cfg) - 1)) { @@ -1008,20 +1067,9 @@ static void hdmi_config_set_spare(bool val) } } -static void hdmi_config() +static void hdmi_config_init() { int ypbpr = cfg.ypbpr && cfg.direct_video; - const uint8_t vic_mode = (uint8_t)v_cur.param.vic; - uint8_t pr_flags; - - if (cfg.direct_video && is_menu()) pr_flags = 0; // automatic pixel repetition - else if (v_cur.param.pr != 0) pr_flags = 0b01001000; // manual pixel repetition with 2x clock - else pr_flags = 0b01000000; // manual pixel repetition - - uint8_t sync_invert = 0; - if (v_cur.param.hpol == 0) sync_invert |= 1 << 5; - if (v_cur.param.vpol == 0) sync_invert |= 1 << 6; - // address, value uint8_t init_data[] = { @@ -1058,7 +1106,7 @@ static void hdmi_config() // DDR Input Edge falling [1]=0 (not using DDR atm). // Output Colour Space RGB [0]=0. - 0x17, (uint8_t)(0b00000010 | sync_invert), // Aspect ratio 16:9 [1]=1, 4:3 [1]=0 + 0x17, 0b01100010, // Aspect ratio 16:9 [1]=1, 4:3 [1]=0, invert sync polarity 0x18, (uint8_t)(ypbpr ? 0x86 : (cfg.hdmi_limited & 1) ? 0x8D : (cfg.hdmi_limited & 2) ? 0x8E : 0x00), // CSC Scaling Factors and Coefficients for RGB Full->Limited. 0x19, (uint8_t)(ypbpr ? 0xDF : (cfg.hdmi_limited & 1) ? 0xBC : 0xFE), // Taken from table in ADV7513 Programming Guide. @@ -1087,7 +1135,7 @@ static void hdmi_config() 0x2E, (uint8_t)(ypbpr ? 0x07 : 0x01), 0x2F, (uint8_t)(ypbpr ? 0xE7 : 0x00), - 0x3B, pr_flags, + 0x3B, 0x0, // Automatic pixel repetition and VIC detection 0x48, 0b00001000, // [6]=0 Normal bus order! @@ -1112,8 +1160,6 @@ static void hdmi_config() | ((ypbpr || cfg.hdmi_limited) ? 0b0100 : 0b1000)), // [3:2] RGB Quantization range // [1:0] Non-Uniform Scaled: 00 - None. 01 - Horiz. 10 - Vert. 11 - Both. - 0x3C, vic_mode, // VIC - 0x59, (uint8_t)(((ypbpr || cfg.hdmi_limited) ? 0x00 : 0x40) // [7:6] [YQ1 YQ0] YCC Quantization Range: b00 = Limited Range, b01 = Full Range | (cfg.hdmi_game_mode ? 0x30 : 0x00)), // [5:4] IT Content Type b11 = Game, b00 = Graphics/None // [3:0] Pixel Repetition Fields b0000 = No Repetition @@ -1205,6 +1251,55 @@ static void hdmi_config() } } +static uint8_t last_sync_invert = 0xff; +static uint8_t last_pr_flags = 0xff; +static uint8_t last_vic_mode = 0xff; + +static void hdmi_config_set_mode(vmode_custom_t *vm) +{ + PROFILE_FUNCTION(); + + const uint8_t vic_mode = (uint8_t)vm->param.vic; + uint8_t pr_flags; + + if (cfg.direct_video && is_menu()) pr_flags = 0; // automatic pixel repetition + else if (vm->param.pr != 0) pr_flags = 0b01001000; // manual pixel repetition with 2x clock + else pr_flags = 0b01000000; // manual pixel repetition + + uint8_t sync_invert = 0; + if (vm->param.hpol == 0) sync_invert |= 1 << 5; + if (vm->param.vpol == 0) sync_invert |= 1 << 6; + + if (last_sync_invert == sync_invert && last_pr_flags == pr_flags && last_vic_mode == vic_mode) return; + + // address, value + uint8_t init_data[] = { + 0x17, (uint8_t)(0b00000010 | sync_invert), // Aspect ratio 16:9 [1]=1, 4:3 [1]=0 + 0x3B, pr_flags, + 0x3C, vic_mode, // VIC + }; + + int fd = i2c_open(0x39, 0); + if (fd >= 0) + { + for (uint i = 0; i < sizeof(init_data); i += 2) + { + int res = i2c_smbus_write_byte_data(fd, init_data[i], init_data[i + 1]); + if (res < 0) printf("i2c: write error (%02X %02X): %d\n", init_data[i], init_data[i + 1], res); + } + + i2c_close(fd); + } + else + { + printf("*** ADV7513 not found on i2c bus! HDMI won't be available!\n"); + } + + last_pr_flags = pr_flags; + last_sync_invert = sync_invert; + last_vic_mode = vic_mode; +} + static void edid_parse_cea_ext(uint8_t *cea) { uint8_t *data_block_end = cea + cea[2]; @@ -1299,8 +1394,6 @@ static int is_edid_valid() static int get_active_edid() { - hdmi_config(); // required to get EDID - int fd = i2c_open(0x39, 0); if (fd < 0) { @@ -1467,20 +1560,28 @@ static int get_edid_vmode(vmode_custom_t *v) static void set_vrr_mode() { + PROFILE_FUNCTION(); + use_vrr = 0; float vrateh = 100000000; if (cfg.vrr_mode == 0) { - hdmi_config_set_spd(0); - hdmi_config_set_spare(0); + if (last_vrr_mode != 0) + { + hdmi_config_set_spd(0); + hdmi_config_set_spare(0); + } + last_vrr_mode = 0; return; } if (current_video_info.vtimeh) vrateh /= current_video_info.vtimeh; else vrateh = 0; if (cfg.vrr_vesa_framerate) vrateh = cfg.vrr_vesa_framerate; - if (last_vrr_mode == cfg.vrr_mode && last_vrr_rate == vrateh) return; + if ((last_vrr_mode == cfg.vrr_mode) && + (last_vrr_rate == vrateh) && + (last_vrr_vfp == v_cur.param.vfp || cfg.vrr_mode != VRR_VESA)) return; if (!is_edid_valid()) { @@ -1629,17 +1730,16 @@ static void set_vrr_mode() } last_vrr_mode = cfg.vrr_mode; last_vrr_rate = vrateh; + last_vrr_vfp = v_cur.param.vfp; if (!supports_vrr() || cfg.vsync_adjust) use_vrr = 0; } -static char fb_reset_cmd[128] = {}; -static void set_video(vmode_custom_t *v, double Fpix) +static void video_set_mode(vmode_custom_t *v, double Fpix) { - loadGammaCfg(); - setGamma(); + PROFILE_FUNCTION(); - loadScalerCfg(); + setGamma(); setScaler(); v_cur = *v; @@ -1734,35 +1834,10 @@ static void set_video(vmode_custom_t *v, double Fpix) printf("Fpix=%f\n", v_cur.Fpix); DisableIO(); - hdmi_config(); + hdmi_config_set_mode(&v_cur); - int fb_scale = cfg.fb_size; + video_fb_config(); - if (fb_scale <= 1) - { - if (((v_cur.item[1] * v_cur.item[5]) > FB_SIZE)) - fb_scale = 2; - else - fb_scale = 1; - } - else if (fb_scale == 3) fb_scale = 2; - else if (fb_scale > 4) fb_scale = 4; - - const int fb_scale_x = fb_scale; - const int fb_scale_y = v_cur.param.pr == 0 ? fb_scale : fb_scale * 2; - - fb_width = v_cur.item[1] / fb_scale_x; - fb_height = v_cur.item[5] / fb_scale_y; - - brd_x = cfg.vscale_border / fb_scale_x; - brd_y = cfg.vscale_border / fb_scale_y; - - if (fb_enabled) video_fb_enable(1, fb_num); - - sprintf(fb_reset_cmd, "echo %d %d %d %d %d >/sys/module/MiSTer_fb/parameters/mode", 8888, 1, fb_width, fb_height, fb_width * 4); - system(fb_reset_cmd); - - loadShadowMaskCfg(); setShadowMask(); } @@ -1882,9 +1957,8 @@ static void fb_init() spi_uio_cmd16(UIO_SET_FBUF, 0); } -void video_mode_load() +static void video_mode_load() { - fb_init(); if (cfg.direct_video && cfg.vsync_adjust) { printf("Disabling vsync_adjust because of enabled direct video.\n"); @@ -1921,9 +1995,22 @@ void video_mode_load() vmode_ntsc = store_custom_video_mode(cfg.video_conf_ntsc, &v_ntsc); } } - set_video(&v_def, 0); } +void video_init() +{ + fb_init(); + hdmi_config_init(); + video_mode_load(); + + loadGammaCfg(); + loadScalerCfg(); + loadShadowMaskCfg(); + + video_set_mode(&v_def, 0); +} + + static int api1_5 = 0; int hasAPI1_5() { @@ -2246,6 +2333,12 @@ void video_mode_adjust() { current_video_info = video_info; + show_video_info(&video_info, &v_cur); + } + force = false; + + if (vid_changed && !is_menu()) + { if (cfg_has_video_sections()) { cfg_parse(); @@ -2253,60 +2346,85 @@ void video_mode_adjust() user_io_send_buttons(1); } - show_video_info(&video_info, &v_cur); - video_scaling_adjust(&video_info, &v_cur); - } - force = false; - - if (vid_changed && !is_menu() && (cfg.vsync_adjust || cfg.vscale_mode >= 4)) - { - const uint32_t vtime = video_info.vtime; - - printf("\033[1;33madjust_video_mode(%u): vsync_adjust=%d vscale_mode=%d.\033[0m\n", vtime, cfg.vsync_adjust, cfg.vscale_mode); - - vmode_custom_t new_mode; - bool adjust = video_mode_select(vtime, &new_mode); - - video_resolution_adjust(&video_info, &new_mode); - - vmode_custom_t *v = &new_mode; - double Fpix = 0; - if (adjust) + if ((cfg.vsync_adjust || cfg.vscale_mode >= 4)) { - Fpix = 100 * (v->item[1] + v->item[2] + v->item[3] + v->item[4]) * (v->item[5] + v->item[6] + v->item[7] + v->item[8]); - Fpix /= vtime; - if (Fpix < 2.f || Fpix > 300.f) + const uint32_t vtime = video_info.vtime; + + printf("\033[1;33madjust_video_mode(%u): vsync_adjust=%d vscale_mode=%d.\033[0m\n", vtime, cfg.vsync_adjust, cfg.vscale_mode); + + vmode_custom_t new_mode; + bool adjust = video_mode_select(vtime, &new_mode); + + video_resolution_adjust(&video_info, &new_mode); + + vmode_custom_t *v = &new_mode; + double Fpix = 0; + if (adjust) { - printf("Estimated Fpix(%.4f MHz) is outside supported range. Canceling auto-adjust.\n", Fpix); - Fpix = 0; + Fpix = 100 * (v->item[1] + v->item[2] + v->item[3] + v->item[4]) * (v->item[5] + v->item[6] + v->item[7] + v->item[8]); + Fpix /= vtime; + if (Fpix < 2.f || Fpix > 300.f) + { + printf("Estimated Fpix(%.4f MHz) is outside supported range. Canceling auto-adjust.\n", Fpix); + Fpix = 0; + } + + float hz = 100000000.0f / vtime; + if (cfg.refresh_min && hz < cfg.refresh_min) + { + printf("Estimated frame rate (%f Hz) is less than REFRESH_MIN(%f Hz). Canceling auto-adjust.\n", hz, cfg.refresh_min); + Fpix = 0; + } + + if (cfg.refresh_max && hz > cfg.refresh_max) + { + printf("Estimated frame rate (%f Hz) is more than REFRESH_MAX(%f Hz). Canceling auto-adjust.\n", hz, cfg.refresh_max); + Fpix = 0; + } } - float hz = 100000000.0f / vtime; - if (cfg.refresh_min && hz < cfg.refresh_min) - { - printf("Estimated frame rate (%f Hz) is less than REFRESH_MIN(%f Hz). Canceling auto-adjust.\n", hz, cfg.refresh_min); - Fpix = 0; - } - - if (cfg.refresh_max && hz > cfg.refresh_max) - { - printf("Estimated frame rate (%f Hz) is more than REFRESH_MAX(%f Hz). Canceling auto-adjust.\n", hz, cfg.refresh_max); - Fpix = 0; - } + video_set_mode(v, Fpix); + user_io_send_buttons(1); + force = true; + } + else if (cfg_has_video_sections()) // if we have video sections but aren't updating the resolution for other reasons, then do it here + { + video_set_mode(&v_def, 0); + user_io_send_buttons(1); + force = true; + } + else + { + set_vfilter(1); // force update filters in case interlacing changed } - set_video(v, Fpix); - user_io_send_buttons(1); - force = true; + video_scaling_adjust(&video_info, &v_cur); } else { - set_vfilter(0); + set_vfilter(0); // update filters if flags have changed } } +static void fb_write_module_params() +{ + int width = fb_width; + int height = fb_height; + offload_add_work([=] + { + FILE *fp = fopen("/sys/module/MiSTer_fb/parameters/mode", "wt"); + if (fp) + { + fprintf(fp, "%d %d %d %d %d\n", 8888, 1, width, height, width * 4); + fclose(fp); + } + }); +} + void video_fb_enable(int enable, int n) { + PROFILE_FUNCTION(); + if (fb_base) { int res = spi_uio_cmd_cont(UIO_SET_FBUF); @@ -2345,7 +2463,7 @@ void video_fb_enable(int enable, int n) //printf("Linux frame buffer: %dx%d, stride = %d bytes\n", fb_width, fb_height, fb_width * 4); if (!fb_num) { - system(fb_reset_cmd); + fb_write_module_params(); input_switch(0); } else @@ -2384,6 +2502,37 @@ int video_fb_state() return fb_enabled; } + +static void video_fb_config() +{ + PROFILE_FUNCTION(); + + int fb_scale = cfg.fb_size; + + if (fb_scale <= 1) + { + if (((v_cur.item[1] * v_cur.item[5]) > FB_SIZE)) + fb_scale = 2; + else + fb_scale = 1; + } + else if (fb_scale == 3) fb_scale = 2; + else if (fb_scale > 4) fb_scale = 4; + + const int fb_scale_x = fb_scale; + const int fb_scale_y = v_cur.param.pr == 0 ? fb_scale : fb_scale * 2; + + fb_width = v_cur.item[1] / fb_scale_x; + fb_height = v_cur.item[5] / fb_scale_y; + + brd_x = cfg.vscale_border / fb_scale_x; + brd_y = cfg.vscale_border / fb_scale_y; + + if (fb_enabled) video_fb_enable(1, fb_num); + + fb_write_module_params(); +} + static void draw_checkers() { volatile uint32_t* buf = fb_base + (FB_SIZE*menu_bgn); diff --git a/video.h b/video.h index 5172b2f..1aaf883 100644 --- a/video.h +++ b/video.h @@ -25,6 +25,8 @@ struct VideoInfo bool rotated; }; +void video_init(); + int video_get_scaler_flt(int type); void video_set_scaler_flt(int type, int n); char* video_get_scaler_coeff(int type, int only_name = 1); @@ -41,7 +43,6 @@ char* video_get_shadow_mask(int only_name = 1); void video_set_shadow_mask(const char *name); void video_loadPreset(char *name); -void video_mode_load(); void video_mode_adjust(); int hasAPI1_5();