diff --git a/Makefile b/Makefile
index b8d3ef6..de8048b 100644
--- a/Makefile
+++ b/Makefile
@@ -46,7 +46,11 @@ DEP = $(C_SRC:.c=.c.d) $(CPP_SRC:.cpp=.cpp.d)
DFLAGS = $(INCLUDE) -D_7ZIP_ST -DPACKAGE_VERSION=\"1.3.3\" -DFLAC_API_EXPORTS -DFLAC__HAS_OGG=0 -DHAVE_LROUND -DHAVE_STDINT_H -DHAVE_STDLIB_H -DHAVE_SYS_PARAM_H -DENABLE_64_BIT_WORDS=0 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -DVDATE=\"`date +"%y%m%d"`\"
CFLAGS = $(DFLAGS) -Wall -Wextra -Wno-strict-aliasing -Wno-stringop-overflow -Wno-stringop-truncation -Wno-format-truncation -Wno-psabi -Wno-restrict -c -O3
-LFLAGS = -lc -lstdc++ -lm -lrt $(IMLIB2_LIB) -Llib/bluetooth -lbluetooth
+LFLAGS = -lc -lstdc++ -lm -lrt $(IMLIB2_LIB) -Llib/bluetooth -lbluetooth -lpthread
+
+ifeq ($(PROFILING),1)
+ DFLAGS += -DPROFILING
+endif
$(PRJ): $(OBJ)
$(Q)$(info $@)
diff --git a/MiSTer.vcxproj b/MiSTer.vcxproj
index 74b14cc..b157e17 100644
--- a/MiSTer.vcxproj
+++ b/MiSTer.vcxproj
@@ -73,13 +73,16 @@
+
+
+
@@ -142,13 +145,16 @@
+
+
+
diff --git a/MiSTer.vcxproj.filters b/MiSTer.vcxproj.filters
index c3e2751..7046df0 100644
--- a/MiSTer.vcxproj.filters
+++ b/MiSTer.vcxproj.filters
@@ -223,6 +223,15 @@
Source Files
+
+ Source Files
+
+
+ Source Files
+
+
+ Source Files
+
@@ -432,5 +441,14 @@
Header Files
+
+ Header Files
+
+
+ Header Files
+
+
+ Header Files
+
\ No newline at end of file
diff --git a/file_io.cpp b/file_io.cpp
index 464b4e1..7351734 100644
--- a/file_io.cpp
+++ b/file_io.cpp
@@ -1105,7 +1105,7 @@ void FindStorage(void)
device = 0;
cfg_parse();
device = saveddev;
- video_mode_load();
+ video_init();
user_io_send_buttons(1);
printf("Waiting for USB...\n");
diff --git a/fpga_io.cpp b/fpga_io.cpp
index 7045679..51c4811 100644
--- a/fpga_io.cpp
+++ b/fpga_io.cpp
@@ -15,6 +15,7 @@
#include "osd.h"
#include "menu.h"
#include "shmem.h"
+#include "offload.h"
#include "fpga_base_addr_ac5.h"
#include "fpga_manager.h"
@@ -617,6 +618,8 @@ void app_restart(const char *path, const char *xml)
input_switch(0);
input_uinp_destroy();
+ offload_stop();
+
char *appname = getappname();
printf("restarting the %s\n", appname);
execl(appname, appname, path, xml, NULL);
diff --git a/input.cpp b/input.cpp
index 62cbd03..b679132 100644
--- a/input.cpp
+++ b/input.cpp
@@ -27,6 +27,7 @@
#include "video.h"
#include "joymapping.h"
#include "support.h"
+#include "profiling.h"
#define NUMDEV 30
#define NUMPLAYERS 6
@@ -5178,6 +5179,8 @@ int input_test(int getchar)
int input_poll(int getchar)
{
+ PROFILE_FUNCTION();
+
static int af[NUMPLAYERS] = {};
static uint32_t time[NUMPLAYERS] = {};
static uint32_t joy_prev[NUMPLAYERS] = {};
diff --git a/main.cpp b/main.cpp
index f9109d2..99c03eb 100644
--- a/main.cpp
+++ b/main.cpp
@@ -32,6 +32,7 @@ along with this program. If not, see .
#include "fpga_io.h"
#include "scheduler.h"
#include "osd.h"
+#include "offload.h"
const char *version = "$VER:" VDATE;
@@ -45,6 +46,8 @@ int main(int argc, char *argv[])
CPU_SET(1, &set);
sched_setaffinity(0, sizeof(set), &set);
+ offload_start();
+
fpga_io_init();
DISKLED_OFF;
diff --git a/menu.cpp b/menu.cpp
index 589ae20..3ad06e3 100644
--- a/menu.cpp
+++ b/menu.cpp
@@ -63,6 +63,7 @@ along with this program. If not, see .
#include "support.h"
#include "bootcore.h"
#include "ide.h"
+#include "profiling.h"
/*menu states*/
enum MENU
@@ -899,6 +900,8 @@ static int page = 0;
void HandleUI(void)
{
+ PROFILE_FUNCTION();
+
if (bt_timer >= 0)
{
if (!bt_timer) bt_timer = (int32_t)GetTimer(6000);
diff --git a/offload.cpp b/offload.cpp
new file mode 100644
index 0000000..e25397d
--- /dev/null
+++ b/offload.cpp
@@ -0,0 +1,122 @@
+#include "offload.h"
+#include "profiling.h"
+#include
+#include
+#include
+#include
+
+static constexpr uint32_t QUEUE_SIZE = 8;
+
+static pthread_t s_thread_handle;
+static pthread_cond_t s_cond_work, s_cond_available;
+static pthread_mutex_t s_queue_lock;
+
+struct Work
+{
+ std::function handler;
+};
+
+static Work s_queue[QUEUE_SIZE];
+static uint32_t s_queue_head, s_queue_tail;
+static bool s_quit;
+
+static void *worker_thread(void *)
+{
+ while (true)
+ {
+ Work *current_work = nullptr;
+ // Wait for work
+ pthread_mutex_lock(&s_queue_lock);
+ if (s_queue_head == s_queue_tail)
+ {
+ // queue empty and quit flag set, exit
+ if (s_quit)
+ {
+ pthread_mutex_unlock(&s_queue_lock);
+ break;
+ }
+
+ // wait for work signal
+ pthread_cond_wait(&s_cond_work, &s_queue_lock);
+
+ // quit flag was set and queue still empty, quit
+ if (s_quit && (s_queue_head == s_queue_tail))
+ {
+ pthread_mutex_unlock(&s_queue_lock);
+ break;
+ }
+ }
+
+ // get work
+ current_work = &s_queue[s_queue_tail % QUEUE_SIZE];
+ pthread_mutex_unlock(&s_queue_lock);
+
+ // execute
+ current_work->handler();
+ current_work->handler = nullptr;
+
+ // lock and move tail forward
+ pthread_mutex_lock(&s_queue_lock);
+ s_queue_tail++;
+ pthread_cond_signal(&s_cond_available);
+ pthread_mutex_unlock(&s_queue_lock);
+ }
+ return (void *)0;
+}
+
+void offload_start()
+{
+ pthread_cond_init(&s_cond_available, nullptr);
+ pthread_cond_init(&s_cond_work, nullptr);
+ pthread_mutex_init(&s_queue_lock, nullptr);
+
+ s_queue_head = s_queue_tail = 0;
+ s_quit = false;
+
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+
+ // Set affinity to core #0 since main runs on core #1
+ cpu_set_t set;
+ CPU_ZERO(&set);
+ CPU_SET(0, &set);
+ pthread_attr_setaffinity_np(&attr, sizeof(set), &set);
+
+ pthread_create(&s_thread_handle, &attr, worker_thread, nullptr);
+}
+
+void offload_stop()
+{
+ pthread_mutex_lock(&s_queue_lock);
+
+ s_quit = true;
+ pthread_cond_signal(&s_cond_work);
+
+ pthread_mutex_unlock(&s_queue_lock);
+
+ printf("Waiting for offloaded work to finish...");
+ pthread_join(s_thread_handle, nullptr);
+ printf("Done\n");
+}
+
+void offload_add_work(std::function handler)
+{
+ PROFILE_FUNCTION();
+
+ pthread_mutex_lock(&s_queue_lock);
+
+ if ((s_queue_head - s_queue_tail) == QUEUE_SIZE)
+ {
+ pthread_cond_wait(&s_cond_available, &s_queue_lock);
+ }
+
+ Work *work = &s_queue[s_queue_head % QUEUE_SIZE];
+ work->handler = handler;
+
+ s_queue_head++;
+
+ pthread_cond_signal(&s_cond_work);
+
+ pthread_mutex_unlock(&s_queue_lock);
+}
\ No newline at end of file
diff --git a/offload.h b/offload.h
new file mode 100644
index 0000000..a31ec04
--- /dev/null
+++ b/offload.h
@@ -0,0 +1,12 @@
+#ifndef OFFLOAD_H
+#define OFFLOAD_H
+
+#include
+#include
+
+void offload_start();
+void offload_stop();
+
+void offload_add_work(std::function work);
+
+#endif
\ No newline at end of file
diff --git a/osd.cpp b/osd.cpp
index 6cc7a38..cabd1ac 100644
--- a/osd.cpp
+++ b/osd.cpp
@@ -45,6 +45,7 @@ as rotated copies of the first 128 entries. -- AMR
#include "logo.h"
#include "user_io.h"
#include "hardware.h"
+#include "profiling.h"
#include "support.h"
@@ -661,6 +662,7 @@ char* OsdCoreNameGet()
void OsdUpdate()
{
+ PROFILE_FUNCTION();
int n = is_menu() ? 19 : osd_size;
for (int i = 0; i < n; i++)
{
diff --git a/profiling.cpp b/profiling.cpp
new file mode 100644
index 0000000..1943cb8
--- /dev/null
+++ b/profiling.cpp
@@ -0,0 +1,130 @@
+#ifdef PROFILING
+
+#include "profiling.h"
+
+#include "str_util.h"
+
+#include
+#include
+#include
+
+struct Event
+{
+ const char *name;
+ uint32_t begin_idx;
+ struct timespec ts;
+};
+
+static constexpr int MAX_EVENTS = 512; // must be pow2
+static Event s_events[MAX_EVENTS]; // circular buffer
+static uint32_t s_event_tail = 0;
+
+static constexpr Event *get_event(uint32_t idx)
+{
+ return &s_events[idx % MAX_EVENTS];
+}
+
+uint32_t profiling_event_begin(const char *name)
+{
+ Event *newEvent = get_event(s_event_tail);
+ newEvent->begin_idx = s_event_tail;
+ newEvent->name = name;
+ clock_gettime(CLOCK_MONOTONIC, &newEvent->ts);
+
+ uint32_t r = s_event_tail;
+ s_event_tail++;
+ return r;
+}
+
+void profiling_event_end(uint32_t begin_idx, const char *name)
+{
+ Event *newEvent = get_event(s_event_tail);
+ newEvent->begin_idx = begin_idx;
+ newEvent->name = name;
+ clock_gettime(CLOCK_MONOTONIC, &newEvent->ts);
+ s_event_tail++;
+}
+
+// result_ns = a - b
+static uint64_t delta_ns(const struct timespec *a, const struct timespec *b)
+{
+ struct timespec ts;
+
+ ts.tv_sec = a->tv_sec - b->tv_sec;
+ ts.tv_nsec = a->tv_nsec - b->tv_nsec;
+ if (ts.tv_nsec < 0)
+ {
+ ts.tv_nsec += 1000000000;
+ ts.tv_sec -= 1;
+ }
+
+ uint64_t delta = ts.tv_sec * 1000000000ULL;
+ delta += ts.tv_nsec;
+ return delta;
+}
+
+
+// Bookkeeping data for spike report
+static uint64_t inclusive_times[MAX_EVENTS];
+static uint64_t other_times[MAX_EVENTS];
+static uint32_t pair_stack[MAX_EVENTS / 2];
+
+void profiling_spike_report(uint32_t begin_idx, uint32_t spike_us)
+{
+ int stack_pos = 0;
+
+ if ((s_event_tail - begin_idx) < 2) return; // not enough events
+ if ((s_event_tail - begin_idx) > MAX_EVENTS) return; // too many events
+
+ const uint64_t total_ns = delta_ns(&get_event(s_event_tail - 1)->ts, &get_event(begin_idx)->ts);
+
+ if (total_ns < (spike_us * 1000ULL)) return; // below threshold
+
+ for (uint32_t idx = begin_idx; idx != s_event_tail; idx++)
+ {
+ const uint32_t cyc_idx = idx % MAX_EVENTS;
+ Event *event = get_event(idx);
+
+ if (event->begin_idx == idx)
+ {
+ pair_stack[stack_pos] = cyc_idx;
+ inclusive_times[cyc_idx] = 0;
+ other_times[cyc_idx] = 0;
+ stack_pos++;
+ }
+ else
+ {
+ stack_pos--;
+ uint32_t span_idx = pair_stack[stack_pos];
+ const uint64_t inclusive_ns = delta_ns(&event->ts, &get_event(span_idx)->ts);
+ inclusive_times[span_idx] = inclusive_ns;
+ if (stack_pos > 0) other_times[pair_stack[stack_pos-1]] += inclusive_ns;
+ }
+ }
+
+ char label[256];
+ int indent = 0;
+ printf("\n%lluus spike over %uus limit.\n", total_ns / 1000ULL, spike_us);
+ printf("+----- Name -----------------------------------------+ Inc(us) + Exc(us) +\n");
+ for (uint32_t idx = begin_idx; idx != s_event_tail; idx++)
+ {
+ const uint32_t cyc_idx = idx % MAX_EVENTS;
+ Event *event = get_event(idx);
+
+ if (event->begin_idx == idx)
+ {
+ memset(label, ' ', indent);
+ strcpyz(label + indent, sizeof(label) - indent, event->name);
+ printf("| %-50s | %7llu | %7llu |\n", label, inclusive_times[cyc_idx] / 1000ULL, (inclusive_times[cyc_idx] - other_times[cyc_idx]) / 1000ULL);
+ indent += 2;
+ }
+ else
+ {
+ indent -= 2;
+ }
+ }
+ printf("+----------------------------------------------------+---------+---------+\n\n");
+ fflush(stdout);
+}
+
+#endif // PROFILING
\ No newline at end of file
diff --git a/profiling.h b/profiling.h
new file mode 100644
index 0000000..4374580
--- /dev/null
+++ b/profiling.h
@@ -0,0 +1,53 @@
+#ifndef PROFILING_H
+#define PROFILING_H 1
+
+#include
+
+#ifdef PROFILING
+
+uint32_t profiling_event_begin(const char *name);
+void profiling_event_end(uint32_t begin_idx, const char *name);
+void profiling_spike_report(uint32_t begin_idx, uint32_t spike_us);
+
+struct ProfilingScopedEvent
+{
+ const char *name;
+ uint32_t spike_us;
+ uint32_t begin_idx;
+
+ ProfilingScopedEvent(const char *name)
+ : name(name)
+ , spike_us(0)
+ {
+ begin_idx = profiling_event_begin(name);
+ }
+
+ ProfilingScopedEvent(const char *name, uint32_t spike_us)
+ : name(name)
+ , spike_us(spike_us)
+ {
+ begin_idx = profiling_event_begin(name);
+ }
+
+ ~ProfilingScopedEvent()
+ {
+ profiling_event_end(begin_idx, name);
+ if (spike_us > 0) profiling_spike_report(begin_idx, spike_us);
+ }
+};
+
+#define PROFILE_SCOPE(name) ProfilingScopedEvent __scope_timer(name)
+#define PROFILE_FUNCTION() ProfilingScopedEvent __scope_timer(__FUNCTION__)
+#define SPIKE_SCOPE(name, us) ProfilingScopedEvent __scope_timer(name, us)
+#define SPIKE_FUNCTION(us) ProfilingScopedEvent __scope_timer(__FUNCTION__, us)
+
+#else // PROFILING
+
+#define PROFILE_SCOPE(name)
+#define PROFILE_FUNCTION()
+#define SPIKE_SCOPE(name, us)
+#define SPIKE_FUNCTION(us)
+
+#endif // PROFILING
+
+#endif // PROFILING_H
diff --git a/scheduler.cpp b/scheduler.cpp
index 26979d6..919d31a 100644
--- a/scheduler.cpp
+++ b/scheduler.cpp
@@ -6,6 +6,7 @@
#include "input.h"
#include "fpga_io.h"
#include "osd.h"
+#include "profiling.h"
static cothread_t co_scheduler = nullptr;
static cothread_t co_poll = nullptr;
@@ -26,8 +27,11 @@ static void scheduler_co_poll(void)
{
scheduler_wait_fpga_ready();
- user_io_poll();
- input_poll(0);
+ {
+ SPIKE_SCOPE("co_poll", 1000);
+ user_io_poll();
+ input_poll(0);
+ }
scheduler_yield();
}
@@ -37,8 +41,11 @@ static void scheduler_co_ui(void)
{
for (;;)
{
- HandleUI();
- OsdUpdate();
+ {
+ SPIKE_SCOPE("co_ui", 1000);
+ HandleUI();
+ OsdUpdate();
+ }
scheduler_yield();
}
diff --git a/user_io.cpp b/user_io.cpp
index ba5dcd7..d6e075a 100644
--- a/user_io.cpp
+++ b/user_io.cpp
@@ -34,6 +34,7 @@
#include "audio.h"
#include "shmem.h"
#include "ide.h"
+#include "profiling.h"
#include "support.h"
@@ -1321,7 +1322,7 @@ void user_io_init(const char *path, const char *xml)
bootcore_init(xml ? xml : path);
}
- video_mode_load();
+ video_init();
if (strlen(cfg.font)) LoadFont(cfg.font);
load_volume();
@@ -2783,6 +2784,8 @@ static uint32_t res_timer = 0;
void user_io_poll()
{
+ PROFILE_FUNCTION();
+
if ((core_type != CORE_TYPE_SHARPMZ) &&
(core_type != CORE_TYPE_8BIT))
{
diff --git a/video.cpp b/video.cpp
index 59444d1..f15ed15 100644
--- a/video.cpp
+++ b/video.cpp
@@ -23,9 +23,12 @@
#include "shmem.h"
#include "smbus.h"
#include "str_util.h"
+#include "profiling.h"
+#include "offload.h"
#include "support.h"
#include "lib/imlib2/Imlib2.h"
+#include "lib/md5/md5.h"
#define FB_SIZE (1920*1080)
#define FB_ADDR (0x20000000 + (32*1024*1024)) // 512mb + 32mb(Core's fb)
@@ -90,6 +93,7 @@ static vrr_cap_t vrr_modes[3] = {
static uint8_t last_vrr_mode = 0xFF;
static float last_vrr_rate = 0.0f;
+static uint32_t last_vrr_vfp = 0;
static uint8_t edid[256] = {};
struct vmode_t
@@ -172,6 +176,10 @@ struct vmode_custom_t
static_assert(sizeof(vmode_custom_param_t) == sizeof(vmode_custom_t::item));
+// Static fwd decl
+static void video_fb_config();
+static void video_calculate_cvt(int horiz_pixels, int vert_pixels, float refresh_rate, int reduced_blanking, vmode_custom_t *vmode);
+
static vmode_custom_t v_cur = {}, v_def = {}, v_pal = {}, v_ntsc = {};
static int vmode_def = 0, vmode_pal = 0, vmode_ntsc = 0;
@@ -189,8 +197,6 @@ static bool supports_vrr()
return video_version != 0;
}
-static void video_calculate_cvt(int horiz_pixels, int vert_pixels, float refresh_rate, int reduced_blanking, vmode_custom_t *vmode);
-
static uint32_t getPLLdiv(uint32_t div)
{
if (div & 1) return 0x20000 | (((div / 2) + 1) << 8) | (div / 2);
@@ -237,6 +243,8 @@ static int findPLLpar(double Fout, uint32_t *pc, uint32_t *pm, double *pko)
static void setPLL(double Fout, vmode_custom_t *v)
{
+ PROFILE_FUNCTION();
+
double Fpix;
double fvco, ko;
uint32_t m, c;
@@ -303,13 +311,25 @@ struct FilterPhase
static constexpr int N_PHASES = 256;
+struct VideoFilterDigest
+{
+ VideoFilterDigest() { memset(md5, 0, sizeof(md5)); }
+ bool operator!=(const VideoFilterDigest& other) { return memcmp(md5, other.md5, sizeof(md5)) != 0; }
+ bool operator==(const VideoFilterDigest& other) { return memcmp(md5, other.md5, sizeof(md5)) == 0; }
+
+ unsigned char md5[16];
+};
+
struct VideoFilter
{
bool is_adaptive;
FilterPhase phases[N_PHASES];
FilterPhase adaptive_phases[N_PHASES];
+ VideoFilterDigest digest;
};
+static VideoFilter scaler_flt_data[3];
+
static bool scale_phases(FilterPhase out_phases[N_PHASES], FilterPhase *in_phases, int in_count)
{
if (!in_count)
@@ -337,12 +357,16 @@ static bool scale_phases(FilterPhase out_phases[N_PHASES], FilterPhase *in_phase
static bool read_video_filter(int type, VideoFilter *out)
{
+ PROFILE_FUNCTION();
+
fileTextReader reader = {};
FilterPhase phases[512];
int count = 0;
bool is_adaptive = false;
int scale = 2;
+ memset(out, 0, sizeof(VideoFilter));
+
static char filename[1024];
snprintf(filename, sizeof(filename), COEFF_DIR"/%s", scaler_flt[type].filename);
@@ -382,29 +406,50 @@ static bool read_video_filter(int type, VideoFilter *out)
is_adaptive ? count / 2 : count,
is_adaptive ? "true" : "false" );
+ bool valid = false;
if (is_adaptive)
{
out->is_adaptive = true;
- bool valid = scale_phases(out->phases, phases, count / 2);
+ valid = scale_phases(out->phases, phases, count / 2);
valid = valid && scale_phases(out->adaptive_phases, phases + (count / 2), count / 2);
- return valid;
}
else if (count == 32 && !is_adaptive) // legacy
{
out->is_adaptive = false;
- return scale_phases(out->phases, phases, 16);
+ valid = scale_phases(out->phases, phases, 16);
}
else if (!is_adaptive)
{
out->is_adaptive = false;
- return scale_phases(out->phases, phases, count);
+ valid = scale_phases(out->phases, phases, count);
+ }
+ else
+ {
+ // Make a default NN filter in case of error
+ out->is_adaptive = false;
+ FilterPhase nn_phases[2] =
+ {
+ { .t = { 0, 256, 0, 0 } },
+ { .t = { 0, 0, 256, 0 } }
+ };
+ scale_phases(out->phases, nn_phases, 2);
+ valid = false;
}
- return false;
+ MD5Context ctx;
+ MD5Init(&ctx);
+ MD5Update(&ctx, (unsigned char *)&out->is_adaptive, sizeof(VideoFilter::is_adaptive));
+ MD5Update(&ctx, (unsigned char *)out->phases, sizeof(VideoFilter::phases));
+ MD5Update(&ctx, (unsigned char *)out->adaptive_phases, sizeof(VideoFilter::adaptive_phases));
+ MD5Final(out->digest.md5, &ctx);
+
+ return valid;
}
static void send_phases_legacy(int addr, const FilterPhase phases[N_PHASES])
{
+ PROFILE_FUNCTION();
+
for (int idx = 0; idx < N_PHASES; idx += 16)
{
const FilterPhase *p = &phases[idx];
@@ -418,6 +463,8 @@ static void send_phases_legacy(int addr, const FilterPhase phases[N_PHASES])
static void send_phases(int addr, const FilterPhase phases[N_PHASES], bool full_precision)
{
+ PROFILE_FUNCTION();
+
const int skip = full_precision ? 1 : 4;
const int shift = full_precision ? 0 : 1;
@@ -434,31 +481,38 @@ static void send_phases(int addr, const FilterPhase phases[N_PHASES], bool full_
}
}
+static VideoFilterDigest horiz_filter_digest, vert_filter_digest;
+
static void send_video_filters(const VideoFilter *horiz, const VideoFilter *vert, int ver)
{
+ PROFILE_FUNCTION();
+
spi_uio_cmd_cont(UIO_SET_FLTCOEF);
const bool full_precision = (ver & 0x4) != 0;
+ const bool send_horiz = horiz_filter_digest != horiz->digest;
+ const bool send_vert = vert_filter_digest != vert->digest;
+
switch( ver & 0x3 )
{
case 1:
- send_phases_legacy(0, horiz->phases);
- send_phases_legacy(64, vert->phases);
+ if (send_horiz) send_phases_legacy(0, horiz->phases);
+ if (send_vert) send_phases_legacy(64, vert->phases);
break;
case 2:
- send_phases(0, horiz->phases, full_precision);
- send_phases(1, vert->phases, full_precision);
+ if (send_horiz) send_phases(0, horiz->phases, full_precision);
+ if (send_vert) send_phases(1, vert->phases, full_precision);
break;
case 3:
- send_phases(0, horiz->phases, full_precision);
- send_phases(1, vert->phases, full_precision);
+ if (send_horiz) send_phases(0, horiz->phases, full_precision);
+ if (send_vert) send_phases(1, vert->phases, full_precision);
- if (horiz->is_adaptive)
+ if (horiz->is_adaptive && send_horiz)
{
send_phases(2, horiz->adaptive_phases, full_precision);
}
- else if (vert->is_adaptive)
+ else if (vert->is_adaptive && send_vert)
{
send_phases(3, vert->adaptive_phases, full_precision);
}
@@ -467,11 +521,16 @@ static void send_video_filters(const VideoFilter *horiz, const VideoFilter *vert
break;
}
+ horiz_filter_digest = horiz->digest;
+ vert_filter_digest = vert->digest;
+
DisableIO();
}
static void set_vfilter(int force)
{
+ PROFILE_FUNCTION();
+
static int last_flags = 0;
int flt_flags = spi_uio_cmd_cont(UIO_SET_FLTNUM);
@@ -487,33 +546,19 @@ static void set_vfilter(int force)
spi8(scaler_flt[0].mode);
DisableIO();
- VideoFilter horiz, vert;
+ int vert_flt;
+ if (current_video_info.interlaced) vert_flt = VFILTER_HORZ;
+ else if ((flt_flags & 0x30) && scaler_flt[VFILTER_SCAN].mode) vert_flt = VFILTER_SCAN;
+ else if (scaler_flt[VFILTER_VERT].mode) vert_flt = VFILTER_VERT;
+ else vert_flt = VFILTER_HORZ;
- //horizontal filter
- bool valid = read_video_filter(VFILTER_HORZ, &horiz);
- if (valid)
- {
- //vertical/scanlines filter
- int vert_flt;
- if (current_video_info.interlaced) vert_flt = VFILTER_HORZ;
- else if ((flt_flags & 0x30) && scaler_flt[VFILTER_SCAN].mode) vert_flt = VFILTER_SCAN;
- else if (scaler_flt[VFILTER_VERT].mode) vert_flt = VFILTER_VERT;
- else vert_flt = VFILTER_HORZ;
-
- if (!read_video_filter(vert_flt, &vert))
- {
- vert = horiz;
- valid = true;
- }
-
- send_video_filters(&horiz, &vert, flt_flags & 0xF);
- }
-
- if (!valid) spi_uio_cmd8(UIO_SET_FLTNUM, 0);
+ send_video_filters(&scaler_flt_data[VFILTER_HORZ], &scaler_flt_data[vert_flt], flt_flags & 0xF);
}
static void setScaler()
{
+ PROFILE_FUNCTION();
+
uint32_t arc[4] = {};
for (int i = 0; i < 2; i++)
{
@@ -563,12 +608,15 @@ void video_set_scaler_coeff(int type, const char *name)
{
strcpy(scaler_flt[type].filename, name);
FileSaveConfig(scaler_cfg, &scaler_flt, sizeof(scaler_flt));
+ read_video_filter(type, &scaler_flt_data[type]);
setScaler();
user_io_send_buttons(1);
}
static void loadScalerCfg()
{
+ PROFILE_FUNCTION();
+
sprintf(scaler_cfg, "%s_scaler.cfg", user_io_get_core_name());
memset(scaler_flt, 0, sizeof(scaler_cfg));
if (!FileLoadConfig(scaler_cfg, &scaler_flt, sizeof(scaler_flt)) || scaler_flt[0].mode > 1)
@@ -594,17 +642,21 @@ static void loadScalerCfg()
scaler_flt[VFILTER_SCAN].mode = 1;
}
- VideoFilter null;
- if (!read_video_filter(VFILTER_HORZ, &null)) memset(&scaler_flt[VFILTER_HORZ], 0, sizeof(scaler_flt[VFILTER_HORZ]));
- if (!read_video_filter(VFILTER_VERT, &null)) memset(&scaler_flt[VFILTER_VERT], 0, sizeof(scaler_flt[VFILTER_VERT]));
- if (!read_video_filter(VFILTER_SCAN, &null)) memset(&scaler_flt[VFILTER_SCAN], 0, sizeof(scaler_flt[VFILTER_SCAN]));
+ if (!read_video_filter(VFILTER_HORZ, &scaler_flt_data[VFILTER_HORZ])) memset(&scaler_flt[VFILTER_HORZ], 0, sizeof(scaler_flt[VFILTER_HORZ]));
+ if (!read_video_filter(VFILTER_VERT, &scaler_flt_data[VFILTER_VERT])) memset(&scaler_flt[VFILTER_VERT], 0, sizeof(scaler_flt[VFILTER_VERT]));
+ if (!read_video_filter(VFILTER_SCAN, &scaler_flt_data[VFILTER_SCAN])) memset(&scaler_flt[VFILTER_SCAN], 0, sizeof(scaler_flt[VFILTER_SCAN]));
}
+static char active_gamma_cfg[1024] = { 0 };
static char gamma_cfg[1024] = { 0 };
static char has_gamma = 0;
static void setGamma()
{
+ PROFILE_FUNCTION();
+
+ if (!memcmp(active_gamma_cfg, gamma_cfg, sizeof(gamma_cfg))) return;
+
fileTextReader reader = {};
static char filename[1024];
@@ -617,6 +669,7 @@ static void setGamma()
has_gamma = 1;
spi8(0);
DisableIO();
+
snprintf(filename, sizeof(filename), GAMMA_DIR"/%s", gamma_cfg + 1);
if (FileOpenTextReader(&reader, filename))
@@ -649,6 +702,7 @@ static void setGamma()
DisableIO();
spi_uio_cmd8(UIO_SET_GAMMA, gamma_cfg[0]);
}
+ memcpy(active_gamma_cfg, gamma_cfg, sizeof(gamma_cfg));
}
int video_get_gamma_en()
@@ -686,6 +740,7 @@ void video_set_gamma_curve(const char *name)
static void loadGammaCfg()
{
+ PROFILE_FUNCTION();
sprintf(gamma_cfg_path, "%s_gamma.cfg", user_io_get_core_name());
if (!FileLoadConfig(gamma_cfg_path, &gamma_cfg, sizeof(gamma_cfg) - 1) || gamma_cfg[0]>1)
{
@@ -717,6 +772,8 @@ enum
static void setShadowMask()
{
+ PROFILE_FUNCTION();
+
static char filename[1024];
has_shadow_mask = 0;
@@ -860,6 +917,8 @@ void video_set_shadow_mask(const char *name)
static void loadShadowMaskCfg()
{
+ PROFILE_FUNCTION();
+
sprintf(shadow_mask_cfg_path, "%s_shmask.cfg", user_io_get_core_name());
if (!FileLoadConfig(shadow_mask_cfg_path, &shadow_mask_cfg, sizeof(shadow_mask_cfg) - 1))
{
@@ -1008,20 +1067,9 @@ static void hdmi_config_set_spare(bool val)
}
}
-static void hdmi_config()
+static void hdmi_config_init()
{
int ypbpr = cfg.ypbpr && cfg.direct_video;
- const uint8_t vic_mode = (uint8_t)v_cur.param.vic;
- uint8_t pr_flags;
-
- if (cfg.direct_video && is_menu()) pr_flags = 0; // automatic pixel repetition
- else if (v_cur.param.pr != 0) pr_flags = 0b01001000; // manual pixel repetition with 2x clock
- else pr_flags = 0b01000000; // manual pixel repetition
-
- uint8_t sync_invert = 0;
- if (v_cur.param.hpol == 0) sync_invert |= 1 << 5;
- if (v_cur.param.vpol == 0) sync_invert |= 1 << 6;
-
// address, value
uint8_t init_data[] = {
@@ -1058,7 +1106,7 @@ static void hdmi_config()
// DDR Input Edge falling [1]=0 (not using DDR atm).
// Output Colour Space RGB [0]=0.
- 0x17, (uint8_t)(0b00000010 | sync_invert), // Aspect ratio 16:9 [1]=1, 4:3 [1]=0
+ 0x17, 0b01100010, // Aspect ratio 16:9 [1]=1, 4:3 [1]=0, invert sync polarity
0x18, (uint8_t)(ypbpr ? 0x86 : (cfg.hdmi_limited & 1) ? 0x8D : (cfg.hdmi_limited & 2) ? 0x8E : 0x00), // CSC Scaling Factors and Coefficients for RGB Full->Limited.
0x19, (uint8_t)(ypbpr ? 0xDF : (cfg.hdmi_limited & 1) ? 0xBC : 0xFE), // Taken from table in ADV7513 Programming Guide.
@@ -1087,7 +1135,7 @@ static void hdmi_config()
0x2E, (uint8_t)(ypbpr ? 0x07 : 0x01),
0x2F, (uint8_t)(ypbpr ? 0xE7 : 0x00),
- 0x3B, pr_flags,
+ 0x3B, 0x0, // Automatic pixel repetition and VIC detection
0x48, 0b00001000, // [6]=0 Normal bus order!
@@ -1112,8 +1160,6 @@ static void hdmi_config()
| ((ypbpr || cfg.hdmi_limited) ? 0b0100 : 0b1000)), // [3:2] RGB Quantization range
// [1:0] Non-Uniform Scaled: 00 - None. 01 - Horiz. 10 - Vert. 11 - Both.
- 0x3C, vic_mode, // VIC
-
0x59, (uint8_t)(((ypbpr || cfg.hdmi_limited) ? 0x00 : 0x40) // [7:6] [YQ1 YQ0] YCC Quantization Range: b00 = Limited Range, b01 = Full Range
| (cfg.hdmi_game_mode ? 0x30 : 0x00)), // [5:4] IT Content Type b11 = Game, b00 = Graphics/None
// [3:0] Pixel Repetition Fields b0000 = No Repetition
@@ -1205,6 +1251,55 @@ static void hdmi_config()
}
}
+static uint8_t last_sync_invert = 0xff;
+static uint8_t last_pr_flags = 0xff;
+static uint8_t last_vic_mode = 0xff;
+
+static void hdmi_config_set_mode(vmode_custom_t *vm)
+{
+ PROFILE_FUNCTION();
+
+ const uint8_t vic_mode = (uint8_t)vm->param.vic;
+ uint8_t pr_flags;
+
+ if (cfg.direct_video && is_menu()) pr_flags = 0; // automatic pixel repetition
+ else if (vm->param.pr != 0) pr_flags = 0b01001000; // manual pixel repetition with 2x clock
+ else pr_flags = 0b01000000; // manual pixel repetition
+
+ uint8_t sync_invert = 0;
+ if (vm->param.hpol == 0) sync_invert |= 1 << 5;
+ if (vm->param.vpol == 0) sync_invert |= 1 << 6;
+
+ if (last_sync_invert == sync_invert && last_pr_flags == pr_flags && last_vic_mode == vic_mode) return;
+
+ // address, value
+ uint8_t init_data[] = {
+ 0x17, (uint8_t)(0b00000010 | sync_invert), // Aspect ratio 16:9 [1]=1, 4:3 [1]=0
+ 0x3B, pr_flags,
+ 0x3C, vic_mode, // VIC
+ };
+
+ int fd = i2c_open(0x39, 0);
+ if (fd >= 0)
+ {
+ for (uint i = 0; i < sizeof(init_data); i += 2)
+ {
+ int res = i2c_smbus_write_byte_data(fd, init_data[i], init_data[i + 1]);
+ if (res < 0) printf("i2c: write error (%02X %02X): %d\n", init_data[i], init_data[i + 1], res);
+ }
+
+ i2c_close(fd);
+ }
+ else
+ {
+ printf("*** ADV7513 not found on i2c bus! HDMI won't be available!\n");
+ }
+
+ last_pr_flags = pr_flags;
+ last_sync_invert = sync_invert;
+ last_vic_mode = vic_mode;
+}
+
static void edid_parse_cea_ext(uint8_t *cea)
{
uint8_t *data_block_end = cea + cea[2];
@@ -1299,8 +1394,6 @@ static int is_edid_valid()
static int get_active_edid()
{
- hdmi_config(); // required to get EDID
-
int fd = i2c_open(0x39, 0);
if (fd < 0)
{
@@ -1467,20 +1560,28 @@ static int get_edid_vmode(vmode_custom_t *v)
static void set_vrr_mode()
{
+ PROFILE_FUNCTION();
+
use_vrr = 0;
float vrateh = 100000000;
if (cfg.vrr_mode == 0)
{
- hdmi_config_set_spd(0);
- hdmi_config_set_spare(0);
+ if (last_vrr_mode != 0)
+ {
+ hdmi_config_set_spd(0);
+ hdmi_config_set_spare(0);
+ }
+ last_vrr_mode = 0;
return;
}
if (current_video_info.vtimeh) vrateh /= current_video_info.vtimeh; else vrateh = 0;
if (cfg.vrr_vesa_framerate) vrateh = cfg.vrr_vesa_framerate;
- if (last_vrr_mode == cfg.vrr_mode && last_vrr_rate == vrateh) return;
+ if ((last_vrr_mode == cfg.vrr_mode) &&
+ (last_vrr_rate == vrateh) &&
+ (last_vrr_vfp == v_cur.param.vfp || cfg.vrr_mode != VRR_VESA)) return;
if (!is_edid_valid())
{
@@ -1629,17 +1730,16 @@ static void set_vrr_mode()
}
last_vrr_mode = cfg.vrr_mode;
last_vrr_rate = vrateh;
+ last_vrr_vfp = v_cur.param.vfp;
if (!supports_vrr() || cfg.vsync_adjust) use_vrr = 0;
}
-static char fb_reset_cmd[128] = {};
-static void set_video(vmode_custom_t *v, double Fpix)
+static void video_set_mode(vmode_custom_t *v, double Fpix)
{
- loadGammaCfg();
- setGamma();
+ PROFILE_FUNCTION();
- loadScalerCfg();
+ setGamma();
setScaler();
v_cur = *v;
@@ -1734,35 +1834,10 @@ static void set_video(vmode_custom_t *v, double Fpix)
printf("Fpix=%f\n", v_cur.Fpix);
DisableIO();
- hdmi_config();
+ hdmi_config_set_mode(&v_cur);
- int fb_scale = cfg.fb_size;
+ video_fb_config();
- if (fb_scale <= 1)
- {
- if (((v_cur.item[1] * v_cur.item[5]) > FB_SIZE))
- fb_scale = 2;
- else
- fb_scale = 1;
- }
- else if (fb_scale == 3) fb_scale = 2;
- else if (fb_scale > 4) fb_scale = 4;
-
- const int fb_scale_x = fb_scale;
- const int fb_scale_y = v_cur.param.pr == 0 ? fb_scale : fb_scale * 2;
-
- fb_width = v_cur.item[1] / fb_scale_x;
- fb_height = v_cur.item[5] / fb_scale_y;
-
- brd_x = cfg.vscale_border / fb_scale_x;
- brd_y = cfg.vscale_border / fb_scale_y;
-
- if (fb_enabled) video_fb_enable(1, fb_num);
-
- sprintf(fb_reset_cmd, "echo %d %d %d %d %d >/sys/module/MiSTer_fb/parameters/mode", 8888, 1, fb_width, fb_height, fb_width * 4);
- system(fb_reset_cmd);
-
- loadShadowMaskCfg();
setShadowMask();
}
@@ -1882,9 +1957,8 @@ static void fb_init()
spi_uio_cmd16(UIO_SET_FBUF, 0);
}
-void video_mode_load()
+static void video_mode_load()
{
- fb_init();
if (cfg.direct_video && cfg.vsync_adjust)
{
printf("Disabling vsync_adjust because of enabled direct video.\n");
@@ -1921,9 +1995,22 @@ void video_mode_load()
vmode_ntsc = store_custom_video_mode(cfg.video_conf_ntsc, &v_ntsc);
}
}
- set_video(&v_def, 0);
}
+void video_init()
+{
+ fb_init();
+ hdmi_config_init();
+ video_mode_load();
+
+ loadGammaCfg();
+ loadScalerCfg();
+ loadShadowMaskCfg();
+
+ video_set_mode(&v_def, 0);
+}
+
+
static int api1_5 = 0;
int hasAPI1_5()
{
@@ -2246,6 +2333,12 @@ void video_mode_adjust()
{
current_video_info = video_info;
+ show_video_info(&video_info, &v_cur);
+ }
+ force = false;
+
+ if (vid_changed && !is_menu())
+ {
if (cfg_has_video_sections())
{
cfg_parse();
@@ -2253,60 +2346,85 @@ void video_mode_adjust()
user_io_send_buttons(1);
}
- show_video_info(&video_info, &v_cur);
- video_scaling_adjust(&video_info, &v_cur);
- }
- force = false;
-
- if (vid_changed && !is_menu() && (cfg.vsync_adjust || cfg.vscale_mode >= 4))
- {
- const uint32_t vtime = video_info.vtime;
-
- printf("\033[1;33madjust_video_mode(%u): vsync_adjust=%d vscale_mode=%d.\033[0m\n", vtime, cfg.vsync_adjust, cfg.vscale_mode);
-
- vmode_custom_t new_mode;
- bool adjust = video_mode_select(vtime, &new_mode);
-
- video_resolution_adjust(&video_info, &new_mode);
-
- vmode_custom_t *v = &new_mode;
- double Fpix = 0;
- if (adjust)
+ if ((cfg.vsync_adjust || cfg.vscale_mode >= 4))
{
- Fpix = 100 * (v->item[1] + v->item[2] + v->item[3] + v->item[4]) * (v->item[5] + v->item[6] + v->item[7] + v->item[8]);
- Fpix /= vtime;
- if (Fpix < 2.f || Fpix > 300.f)
+ const uint32_t vtime = video_info.vtime;
+
+ printf("\033[1;33madjust_video_mode(%u): vsync_adjust=%d vscale_mode=%d.\033[0m\n", vtime, cfg.vsync_adjust, cfg.vscale_mode);
+
+ vmode_custom_t new_mode;
+ bool adjust = video_mode_select(vtime, &new_mode);
+
+ video_resolution_adjust(&video_info, &new_mode);
+
+ vmode_custom_t *v = &new_mode;
+ double Fpix = 0;
+ if (adjust)
{
- printf("Estimated Fpix(%.4f MHz) is outside supported range. Canceling auto-adjust.\n", Fpix);
- Fpix = 0;
+ Fpix = 100 * (v->item[1] + v->item[2] + v->item[3] + v->item[4]) * (v->item[5] + v->item[6] + v->item[7] + v->item[8]);
+ Fpix /= vtime;
+ if (Fpix < 2.f || Fpix > 300.f)
+ {
+ printf("Estimated Fpix(%.4f MHz) is outside supported range. Canceling auto-adjust.\n", Fpix);
+ Fpix = 0;
+ }
+
+ float hz = 100000000.0f / vtime;
+ if (cfg.refresh_min && hz < cfg.refresh_min)
+ {
+ printf("Estimated frame rate (%f Hz) is less than REFRESH_MIN(%f Hz). Canceling auto-adjust.\n", hz, cfg.refresh_min);
+ Fpix = 0;
+ }
+
+ if (cfg.refresh_max && hz > cfg.refresh_max)
+ {
+ printf("Estimated frame rate (%f Hz) is more than REFRESH_MAX(%f Hz). Canceling auto-adjust.\n", hz, cfg.refresh_max);
+ Fpix = 0;
+ }
}
- float hz = 100000000.0f / vtime;
- if (cfg.refresh_min && hz < cfg.refresh_min)
- {
- printf("Estimated frame rate (%f Hz) is less than REFRESH_MIN(%f Hz). Canceling auto-adjust.\n", hz, cfg.refresh_min);
- Fpix = 0;
- }
-
- if (cfg.refresh_max && hz > cfg.refresh_max)
- {
- printf("Estimated frame rate (%f Hz) is more than REFRESH_MAX(%f Hz). Canceling auto-adjust.\n", hz, cfg.refresh_max);
- Fpix = 0;
- }
+ video_set_mode(v, Fpix);
+ user_io_send_buttons(1);
+ force = true;
+ }
+ else if (cfg_has_video_sections()) // if we have video sections but aren't updating the resolution for other reasons, then do it here
+ {
+ video_set_mode(&v_def, 0);
+ user_io_send_buttons(1);
+ force = true;
+ }
+ else
+ {
+ set_vfilter(1); // force update filters in case interlacing changed
}
- set_video(v, Fpix);
- user_io_send_buttons(1);
- force = true;
+ video_scaling_adjust(&video_info, &v_cur);
}
else
{
- set_vfilter(0);
+ set_vfilter(0); // update filters if flags have changed
}
}
+static void fb_write_module_params()
+{
+ int width = fb_width;
+ int height = fb_height;
+ offload_add_work([=]
+ {
+ FILE *fp = fopen("/sys/module/MiSTer_fb/parameters/mode", "wt");
+ if (fp)
+ {
+ fprintf(fp, "%d %d %d %d %d\n", 8888, 1, width, height, width * 4);
+ fclose(fp);
+ }
+ });
+}
+
void video_fb_enable(int enable, int n)
{
+ PROFILE_FUNCTION();
+
if (fb_base)
{
int res = spi_uio_cmd_cont(UIO_SET_FBUF);
@@ -2345,7 +2463,7 @@ void video_fb_enable(int enable, int n)
//printf("Linux frame buffer: %dx%d, stride = %d bytes\n", fb_width, fb_height, fb_width * 4);
if (!fb_num)
{
- system(fb_reset_cmd);
+ fb_write_module_params();
input_switch(0);
}
else
@@ -2384,6 +2502,37 @@ int video_fb_state()
return fb_enabled;
}
+
+static void video_fb_config()
+{
+ PROFILE_FUNCTION();
+
+ int fb_scale = cfg.fb_size;
+
+ if (fb_scale <= 1)
+ {
+ if (((v_cur.item[1] * v_cur.item[5]) > FB_SIZE))
+ fb_scale = 2;
+ else
+ fb_scale = 1;
+ }
+ else if (fb_scale == 3) fb_scale = 2;
+ else if (fb_scale > 4) fb_scale = 4;
+
+ const int fb_scale_x = fb_scale;
+ const int fb_scale_y = v_cur.param.pr == 0 ? fb_scale : fb_scale * 2;
+
+ fb_width = v_cur.item[1] / fb_scale_x;
+ fb_height = v_cur.item[5] / fb_scale_y;
+
+ brd_x = cfg.vscale_border / fb_scale_x;
+ brd_y = cfg.vscale_border / fb_scale_y;
+
+ if (fb_enabled) video_fb_enable(1, fb_num);
+
+ fb_write_module_params();
+}
+
static void draw_checkers()
{
volatile uint32_t* buf = fb_base + (FB_SIZE*menu_bgn);
diff --git a/video.h b/video.h
index 5172b2f..1aaf883 100644
--- a/video.h
+++ b/video.h
@@ -25,6 +25,8 @@ struct VideoInfo
bool rotated;
};
+void video_init();
+
int video_get_scaler_flt(int type);
void video_set_scaler_flt(int type, int n);
char* video_get_scaler_coeff(int type, int only_name = 1);
@@ -41,7 +43,6 @@ char* video_get_shadow_mask(int only_name = 1);
void video_set_shadow_mask(const char *name);
void video_loadPreset(char *name);
-void video_mode_load();
void video_mode_adjust();
int hasAPI1_5();