Merge pull request #666 from wickerwaka/upstream-video-optimization

video optimization
This commit is contained in:
Alexey Melnikov
2022-07-19 12:35:27 +08:00
committed by GitHub
17 changed files with 665 additions and 146 deletions

View File

@@ -46,7 +46,11 @@ DEP = $(C_SRC:.c=.c.d) $(CPP_SRC:.cpp=.cpp.d)
DFLAGS = $(INCLUDE) -D_7ZIP_ST -DPACKAGE_VERSION=\"1.3.3\" -DFLAC_API_EXPORTS -DFLAC__HAS_OGG=0 -DHAVE_LROUND -DHAVE_STDINT_H -DHAVE_STDLIB_H -DHAVE_SYS_PARAM_H -DENABLE_64_BIT_WORDS=0 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -DVDATE=\"`date +"%y%m%d"`\"
CFLAGS = $(DFLAGS) -Wall -Wextra -Wno-strict-aliasing -Wno-stringop-overflow -Wno-stringop-truncation -Wno-format-truncation -Wno-psabi -Wno-restrict -c -O3
LFLAGS = -lc -lstdc++ -lm -lrt $(IMLIB2_LIB) -Llib/bluetooth -lbluetooth
LFLAGS = -lc -lstdc++ -lm -lrt $(IMLIB2_LIB) -Llib/bluetooth -lbluetooth -lpthread
ifeq ($(PROFILING),1)
DFLAGS += -DPROFILING
endif
$(PRJ): $(OBJ)
$(Q)$(info $@)

View File

@@ -73,13 +73,16 @@
<ClCompile Include="lib\miniz\miniz_zip.c" />
<ClCompile Include="main.cpp" />
<ClCompile Include="menu.cpp" />
<ClCompile Include="offload.cpp" />
<ClCompile Include="osd.cpp" />
<ClCompile Include="profiling.cpp" />
<ClCompile Include="recent.cpp" />
<ClCompile Include="scaler.cpp" />
<ClCompile Include="scheduler.cpp" />
<ClCompile Include="shmem.cpp" />
<ClCompile Include="smbus.cpp" />
<ClCompile Include="spi.cpp" />
<ClCompile Include="str_util.cpp" />
<ClCompile Include="support\arcade\buffer.cpp" />
<ClCompile Include="support\arcade\mra_loader.cpp" />
<ClCompile Include="support\archie\archie.cpp" />
@@ -142,13 +145,16 @@
<ClInclude Include="lib\miniz\miniz_zip.h" />
<ClInclude Include="logo.h" />
<ClInclude Include="menu.h" />
<ClInclude Include="offload.h" />
<ClInclude Include="osd.h" />
<ClInclude Include="profiling.h" />
<ClInclude Include="recent.h" />
<ClInclude Include="scaler.h" />
<ClInclude Include="scheduler.h" />
<ClInclude Include="shmem.h" />
<ClInclude Include="smbus.h" />
<ClInclude Include="spi.h" />
<ClInclude Include="str_util.h" />
<ClInclude Include="support.h" />
<ClInclude Include="support\arcade\buffer.h" />
<ClInclude Include="support\arcade\mra_loader.h" />

View File

@@ -223,6 +223,15 @@
<ClCompile Include="smbus.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="offload.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="profiling.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="str_util.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="battery.h">
@@ -432,5 +441,14 @@
<ClInclude Include="smbus.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="offload.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="profiling.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="str_util.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
</Project>

View File

@@ -1105,7 +1105,7 @@ void FindStorage(void)
device = 0;
cfg_parse();
device = saveddev;
video_mode_load();
video_init();
user_io_send_buttons(1);
printf("Waiting for USB...\n");

View File

@@ -15,6 +15,7 @@
#include "osd.h"
#include "menu.h"
#include "shmem.h"
#include "offload.h"
#include "fpga_base_addr_ac5.h"
#include "fpga_manager.h"
@@ -617,6 +618,8 @@ void app_restart(const char *path, const char *xml)
input_switch(0);
input_uinp_destroy();
offload_stop();
char *appname = getappname();
printf("restarting the %s\n", appname);
execl(appname, appname, path, xml, NULL);

View File

@@ -27,6 +27,7 @@
#include "video.h"
#include "joymapping.h"
#include "support.h"
#include "profiling.h"
#define NUMDEV 30
#define NUMPLAYERS 6
@@ -5178,6 +5179,8 @@ int input_test(int getchar)
int input_poll(int getchar)
{
PROFILE_FUNCTION();
static int af[NUMPLAYERS] = {};
static uint32_t time[NUMPLAYERS] = {};
static uint32_t joy_prev[NUMPLAYERS] = {};

View File

@@ -32,6 +32,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "fpga_io.h"
#include "scheduler.h"
#include "osd.h"
#include "offload.h"
const char *version = "$VER:" VDATE;
@@ -45,6 +46,8 @@ int main(int argc, char *argv[])
CPU_SET(1, &set);
sched_setaffinity(0, sizeof(set), &set);
offload_start();
fpga_io_init();
DISKLED_OFF;

View File

@@ -63,6 +63,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "support.h"
#include "bootcore.h"
#include "ide.h"
#include "profiling.h"
/*menu states*/
enum MENU
@@ -899,6 +900,8 @@ static int page = 0;
void HandleUI(void)
{
PROFILE_FUNCTION();
if (bt_timer >= 0)
{
if (!bt_timer) bt_timer = (int32_t)GetTimer(6000);

122
offload.cpp Normal file
View File

@@ -0,0 +1,122 @@
#include "offload.h"
#include "profiling.h"
#include <pthread.h>
#include <inttypes.h>
#include <string.h>
#include <stdio.h>
static constexpr uint32_t QUEUE_SIZE = 8;
static pthread_t s_thread_handle;
static pthread_cond_t s_cond_work, s_cond_available;
static pthread_mutex_t s_queue_lock;
struct Work
{
std::function<void()> handler;
};
static Work s_queue[QUEUE_SIZE];
static uint32_t s_queue_head, s_queue_tail;
static bool s_quit;
static void *worker_thread(void *)
{
while (true)
{
Work *current_work = nullptr;
// Wait for work
pthread_mutex_lock(&s_queue_lock);
if (s_queue_head == s_queue_tail)
{
// queue empty and quit flag set, exit
if (s_quit)
{
pthread_mutex_unlock(&s_queue_lock);
break;
}
// wait for work signal
pthread_cond_wait(&s_cond_work, &s_queue_lock);
// quit flag was set and queue still empty, quit
if (s_quit && (s_queue_head == s_queue_tail))
{
pthread_mutex_unlock(&s_queue_lock);
break;
}
}
// get work
current_work = &s_queue[s_queue_tail % QUEUE_SIZE];
pthread_mutex_unlock(&s_queue_lock);
// execute
current_work->handler();
current_work->handler = nullptr;
// lock and move tail forward
pthread_mutex_lock(&s_queue_lock);
s_queue_tail++;
pthread_cond_signal(&s_cond_available);
pthread_mutex_unlock(&s_queue_lock);
}
return (void *)0;
}
void offload_start()
{
pthread_cond_init(&s_cond_available, nullptr);
pthread_cond_init(&s_cond_work, nullptr);
pthread_mutex_init(&s_queue_lock, nullptr);
s_queue_head = s_queue_tail = 0;
s_quit = false;
pthread_attr_t attr;
pthread_attr_init(&attr);
// Set affinity to core #0 since main runs on core #1
cpu_set_t set;
CPU_ZERO(&set);
CPU_SET(0, &set);
pthread_attr_setaffinity_np(&attr, sizeof(set), &set);
pthread_create(&s_thread_handle, &attr, worker_thread, nullptr);
}
void offload_stop()
{
pthread_mutex_lock(&s_queue_lock);
s_quit = true;
pthread_cond_signal(&s_cond_work);
pthread_mutex_unlock(&s_queue_lock);
printf("Waiting for offloaded work to finish...");
pthread_join(s_thread_handle, nullptr);
printf("Done\n");
}
void offload_add_work(std::function<void()> handler)
{
PROFILE_FUNCTION();
pthread_mutex_lock(&s_queue_lock);
if ((s_queue_head - s_queue_tail) == QUEUE_SIZE)
{
pthread_cond_wait(&s_cond_available, &s_queue_lock);
}
Work *work = &s_queue[s_queue_head % QUEUE_SIZE];
work->handler = handler;
s_queue_head++;
pthread_cond_signal(&s_cond_work);
pthread_mutex_unlock(&s_queue_lock);
}

12
offload.h Normal file
View File

@@ -0,0 +1,12 @@
#ifndef OFFLOAD_H
#define OFFLOAD_H
#include <stddef.h>
#include <functional>
void offload_start();
void offload_stop();
void offload_add_work(std::function<void()> work);
#endif

View File

@@ -45,6 +45,7 @@ as rotated copies of the first 128 entries. -- AMR
#include "logo.h"
#include "user_io.h"
#include "hardware.h"
#include "profiling.h"
#include "support.h"
@@ -661,6 +662,7 @@ char* OsdCoreNameGet()
void OsdUpdate()
{
PROFILE_FUNCTION();
int n = is_menu() ? 19 : osd_size;
for (int i = 0; i < n; i++)
{

130
profiling.cpp Normal file
View File

@@ -0,0 +1,130 @@
#ifdef PROFILING
#include "profiling.h"
#include "str_util.h"
#include <stdio.h>
#include <string.h>
#include <time.h>
struct Event
{
const char *name;
uint32_t begin_idx;
struct timespec ts;
};
static constexpr int MAX_EVENTS = 512; // must be pow2
static Event s_events[MAX_EVENTS]; // circular buffer
static uint32_t s_event_tail = 0;
static constexpr Event *get_event(uint32_t idx)
{
return &s_events[idx % MAX_EVENTS];
}
uint32_t profiling_event_begin(const char *name)
{
Event *newEvent = get_event(s_event_tail);
newEvent->begin_idx = s_event_tail;
newEvent->name = name;
clock_gettime(CLOCK_MONOTONIC, &newEvent->ts);
uint32_t r = s_event_tail;
s_event_tail++;
return r;
}
void profiling_event_end(uint32_t begin_idx, const char *name)
{
Event *newEvent = get_event(s_event_tail);
newEvent->begin_idx = begin_idx;
newEvent->name = name;
clock_gettime(CLOCK_MONOTONIC, &newEvent->ts);
s_event_tail++;
}
// result_ns = a - b
static uint64_t delta_ns(const struct timespec *a, const struct timespec *b)
{
struct timespec ts;
ts.tv_sec = a->tv_sec - b->tv_sec;
ts.tv_nsec = a->tv_nsec - b->tv_nsec;
if (ts.tv_nsec < 0)
{
ts.tv_nsec += 1000000000;
ts.tv_sec -= 1;
}
uint64_t delta = ts.tv_sec * 1000000000ULL;
delta += ts.tv_nsec;
return delta;
}
// Bookkeeping data for spike report
static uint64_t inclusive_times[MAX_EVENTS];
static uint64_t other_times[MAX_EVENTS];
static uint32_t pair_stack[MAX_EVENTS / 2];
void profiling_spike_report(uint32_t begin_idx, uint32_t spike_us)
{
int stack_pos = 0;
if ((s_event_tail - begin_idx) < 2) return; // not enough events
if ((s_event_tail - begin_idx) > MAX_EVENTS) return; // too many events
const uint64_t total_ns = delta_ns(&get_event(s_event_tail - 1)->ts, &get_event(begin_idx)->ts);
if (total_ns < (spike_us * 1000ULL)) return; // below threshold
for (uint32_t idx = begin_idx; idx != s_event_tail; idx++)
{
const uint32_t cyc_idx = idx % MAX_EVENTS;
Event *event = get_event(idx);
if (event->begin_idx == idx)
{
pair_stack[stack_pos] = cyc_idx;
inclusive_times[cyc_idx] = 0;
other_times[cyc_idx] = 0;
stack_pos++;
}
else
{
stack_pos--;
uint32_t span_idx = pair_stack[stack_pos];
const uint64_t inclusive_ns = delta_ns(&event->ts, &get_event(span_idx)->ts);
inclusive_times[span_idx] = inclusive_ns;
if (stack_pos > 0) other_times[pair_stack[stack_pos-1]] += inclusive_ns;
}
}
char label[256];
int indent = 0;
printf("\n%lluus spike over %uus limit.\n", total_ns / 1000ULL, spike_us);
printf("+----- Name -----------------------------------------+ Inc(us) + Exc(us) +\n");
for (uint32_t idx = begin_idx; idx != s_event_tail; idx++)
{
const uint32_t cyc_idx = idx % MAX_EVENTS;
Event *event = get_event(idx);
if (event->begin_idx == idx)
{
memset(label, ' ', indent);
strcpyz(label + indent, sizeof(label) - indent, event->name);
printf("| %-50s | %7llu | %7llu |\n", label, inclusive_times[cyc_idx] / 1000ULL, (inclusive_times[cyc_idx] - other_times[cyc_idx]) / 1000ULL);
indent += 2;
}
else
{
indent -= 2;
}
}
printf("+----------------------------------------------------+---------+---------+\n\n");
fflush(stdout);
}
#endif // PROFILING

53
profiling.h Normal file
View File

@@ -0,0 +1,53 @@
#ifndef PROFILING_H
#define PROFILING_H 1
#include <inttypes.h>
#ifdef PROFILING
uint32_t profiling_event_begin(const char *name);
void profiling_event_end(uint32_t begin_idx, const char *name);
void profiling_spike_report(uint32_t begin_idx, uint32_t spike_us);
struct ProfilingScopedEvent
{
const char *name;
uint32_t spike_us;
uint32_t begin_idx;
ProfilingScopedEvent(const char *name)
: name(name)
, spike_us(0)
{
begin_idx = profiling_event_begin(name);
}
ProfilingScopedEvent(const char *name, uint32_t spike_us)
: name(name)
, spike_us(spike_us)
{
begin_idx = profiling_event_begin(name);
}
~ProfilingScopedEvent()
{
profiling_event_end(begin_idx, name);
if (spike_us > 0) profiling_spike_report(begin_idx, spike_us);
}
};
#define PROFILE_SCOPE(name) ProfilingScopedEvent __scope_timer(name)
#define PROFILE_FUNCTION() ProfilingScopedEvent __scope_timer(__FUNCTION__)
#define SPIKE_SCOPE(name, us) ProfilingScopedEvent __scope_timer(name, us)
#define SPIKE_FUNCTION(us) ProfilingScopedEvent __scope_timer(__FUNCTION__, us)
#else // PROFILING
#define PROFILE_SCOPE(name)
#define PROFILE_FUNCTION()
#define SPIKE_SCOPE(name, us)
#define SPIKE_FUNCTION(us)
#endif // PROFILING
#endif // PROFILING_H

View File

@@ -6,6 +6,7 @@
#include "input.h"
#include "fpga_io.h"
#include "osd.h"
#include "profiling.h"
static cothread_t co_scheduler = nullptr;
static cothread_t co_poll = nullptr;
@@ -26,8 +27,11 @@ static void scheduler_co_poll(void)
{
scheduler_wait_fpga_ready();
user_io_poll();
input_poll(0);
{
SPIKE_SCOPE("co_poll", 1000);
user_io_poll();
input_poll(0);
}
scheduler_yield();
}
@@ -37,8 +41,11 @@ static void scheduler_co_ui(void)
{
for (;;)
{
HandleUI();
OsdUpdate();
{
SPIKE_SCOPE("co_ui", 1000);
HandleUI();
OsdUpdate();
}
scheduler_yield();
}

View File

@@ -34,6 +34,7 @@
#include "audio.h"
#include "shmem.h"
#include "ide.h"
#include "profiling.h"
#include "support.h"
@@ -1321,7 +1322,7 @@ void user_io_init(const char *path, const char *xml)
bootcore_init(xml ? xml : path);
}
video_mode_load();
video_init();
if (strlen(cfg.font)) LoadFont(cfg.font);
load_volume();
@@ -2783,6 +2784,8 @@ static uint32_t res_timer = 0;
void user_io_poll()
{
PROFILE_FUNCTION();
if ((core_type != CORE_TYPE_SHARPMZ) &&
(core_type != CORE_TYPE_8BIT))
{

425
video.cpp
View File

@@ -23,9 +23,12 @@
#include "shmem.h"
#include "smbus.h"
#include "str_util.h"
#include "profiling.h"
#include "offload.h"
#include "support.h"
#include "lib/imlib2/Imlib2.h"
#include "lib/md5/md5.h"
#define FB_SIZE (1920*1080)
#define FB_ADDR (0x20000000 + (32*1024*1024)) // 512mb + 32mb(Core's fb)
@@ -90,6 +93,7 @@ static vrr_cap_t vrr_modes[3] = {
static uint8_t last_vrr_mode = 0xFF;
static float last_vrr_rate = 0.0f;
static uint32_t last_vrr_vfp = 0;
static uint8_t edid[256] = {};
struct vmode_t
@@ -172,6 +176,10 @@ struct vmode_custom_t
static_assert(sizeof(vmode_custom_param_t) == sizeof(vmode_custom_t::item));
// Static fwd decl
static void video_fb_config();
static void video_calculate_cvt(int horiz_pixels, int vert_pixels, float refresh_rate, int reduced_blanking, vmode_custom_t *vmode);
static vmode_custom_t v_cur = {}, v_def = {}, v_pal = {}, v_ntsc = {};
static int vmode_def = 0, vmode_pal = 0, vmode_ntsc = 0;
@@ -189,8 +197,6 @@ static bool supports_vrr()
return video_version != 0;
}
static void video_calculate_cvt(int horiz_pixels, int vert_pixels, float refresh_rate, int reduced_blanking, vmode_custom_t *vmode);
static uint32_t getPLLdiv(uint32_t div)
{
if (div & 1) return 0x20000 | (((div / 2) + 1) << 8) | (div / 2);
@@ -237,6 +243,8 @@ static int findPLLpar(double Fout, uint32_t *pc, uint32_t *pm, double *pko)
static void setPLL(double Fout, vmode_custom_t *v)
{
PROFILE_FUNCTION();
double Fpix;
double fvco, ko;
uint32_t m, c;
@@ -303,13 +311,25 @@ struct FilterPhase
static constexpr int N_PHASES = 256;
struct VideoFilterDigest
{
VideoFilterDigest() { memset(md5, 0, sizeof(md5)); }
bool operator!=(const VideoFilterDigest& other) { return memcmp(md5, other.md5, sizeof(md5)) != 0; }
bool operator==(const VideoFilterDigest& other) { return memcmp(md5, other.md5, sizeof(md5)) == 0; }
unsigned char md5[16];
};
struct VideoFilter
{
bool is_adaptive;
FilterPhase phases[N_PHASES];
FilterPhase adaptive_phases[N_PHASES];
VideoFilterDigest digest;
};
static VideoFilter scaler_flt_data[3];
static bool scale_phases(FilterPhase out_phases[N_PHASES], FilterPhase *in_phases, int in_count)
{
if (!in_count)
@@ -337,12 +357,16 @@ static bool scale_phases(FilterPhase out_phases[N_PHASES], FilterPhase *in_phase
static bool read_video_filter(int type, VideoFilter *out)
{
PROFILE_FUNCTION();
fileTextReader reader = {};
FilterPhase phases[512];
int count = 0;
bool is_adaptive = false;
int scale = 2;
memset(out, 0, sizeof(VideoFilter));
static char filename[1024];
snprintf(filename, sizeof(filename), COEFF_DIR"/%s", scaler_flt[type].filename);
@@ -382,29 +406,50 @@ static bool read_video_filter(int type, VideoFilter *out)
is_adaptive ? count / 2 : count,
is_adaptive ? "true" : "false" );
bool valid = false;
if (is_adaptive)
{
out->is_adaptive = true;
bool valid = scale_phases(out->phases, phases, count / 2);
valid = scale_phases(out->phases, phases, count / 2);
valid = valid && scale_phases(out->adaptive_phases, phases + (count / 2), count / 2);
return valid;
}
else if (count == 32 && !is_adaptive) // legacy
{
out->is_adaptive = false;
return scale_phases(out->phases, phases, 16);
valid = scale_phases(out->phases, phases, 16);
}
else if (!is_adaptive)
{
out->is_adaptive = false;
return scale_phases(out->phases, phases, count);
valid = scale_phases(out->phases, phases, count);
}
else
{
// Make a default NN filter in case of error
out->is_adaptive = false;
FilterPhase nn_phases[2] =
{
{ .t = { 0, 256, 0, 0 } },
{ .t = { 0, 0, 256, 0 } }
};
scale_phases(out->phases, nn_phases, 2);
valid = false;
}
return false;
MD5Context ctx;
MD5Init(&ctx);
MD5Update(&ctx, (unsigned char *)&out->is_adaptive, sizeof(VideoFilter::is_adaptive));
MD5Update(&ctx, (unsigned char *)out->phases, sizeof(VideoFilter::phases));
MD5Update(&ctx, (unsigned char *)out->adaptive_phases, sizeof(VideoFilter::adaptive_phases));
MD5Final(out->digest.md5, &ctx);
return valid;
}
static void send_phases_legacy(int addr, const FilterPhase phases[N_PHASES])
{
PROFILE_FUNCTION();
for (int idx = 0; idx < N_PHASES; idx += 16)
{
const FilterPhase *p = &phases[idx];
@@ -418,6 +463,8 @@ static void send_phases_legacy(int addr, const FilterPhase phases[N_PHASES])
static void send_phases(int addr, const FilterPhase phases[N_PHASES], bool full_precision)
{
PROFILE_FUNCTION();
const int skip = full_precision ? 1 : 4;
const int shift = full_precision ? 0 : 1;
@@ -434,31 +481,38 @@ static void send_phases(int addr, const FilterPhase phases[N_PHASES], bool full_
}
}
static VideoFilterDigest horiz_filter_digest, vert_filter_digest;
static void send_video_filters(const VideoFilter *horiz, const VideoFilter *vert, int ver)
{
PROFILE_FUNCTION();
spi_uio_cmd_cont(UIO_SET_FLTCOEF);
const bool full_precision = (ver & 0x4) != 0;
const bool send_horiz = horiz_filter_digest != horiz->digest;
const bool send_vert = vert_filter_digest != vert->digest;
switch( ver & 0x3 )
{
case 1:
send_phases_legacy(0, horiz->phases);
send_phases_legacy(64, vert->phases);
if (send_horiz) send_phases_legacy(0, horiz->phases);
if (send_vert) send_phases_legacy(64, vert->phases);
break;
case 2:
send_phases(0, horiz->phases, full_precision);
send_phases(1, vert->phases, full_precision);
if (send_horiz) send_phases(0, horiz->phases, full_precision);
if (send_vert) send_phases(1, vert->phases, full_precision);
break;
case 3:
send_phases(0, horiz->phases, full_precision);
send_phases(1, vert->phases, full_precision);
if (send_horiz) send_phases(0, horiz->phases, full_precision);
if (send_vert) send_phases(1, vert->phases, full_precision);
if (horiz->is_adaptive)
if (horiz->is_adaptive && send_horiz)
{
send_phases(2, horiz->adaptive_phases, full_precision);
}
else if (vert->is_adaptive)
else if (vert->is_adaptive && send_vert)
{
send_phases(3, vert->adaptive_phases, full_precision);
}
@@ -467,11 +521,16 @@ static void send_video_filters(const VideoFilter *horiz, const VideoFilter *vert
break;
}
horiz_filter_digest = horiz->digest;
vert_filter_digest = vert->digest;
DisableIO();
}
static void set_vfilter(int force)
{
PROFILE_FUNCTION();
static int last_flags = 0;
int flt_flags = spi_uio_cmd_cont(UIO_SET_FLTNUM);
@@ -487,33 +546,19 @@ static void set_vfilter(int force)
spi8(scaler_flt[0].mode);
DisableIO();
VideoFilter horiz, vert;
int vert_flt;
if (current_video_info.interlaced) vert_flt = VFILTER_HORZ;
else if ((flt_flags & 0x30) && scaler_flt[VFILTER_SCAN].mode) vert_flt = VFILTER_SCAN;
else if (scaler_flt[VFILTER_VERT].mode) vert_flt = VFILTER_VERT;
else vert_flt = VFILTER_HORZ;
//horizontal filter
bool valid = read_video_filter(VFILTER_HORZ, &horiz);
if (valid)
{
//vertical/scanlines filter
int vert_flt;
if (current_video_info.interlaced) vert_flt = VFILTER_HORZ;
else if ((flt_flags & 0x30) && scaler_flt[VFILTER_SCAN].mode) vert_flt = VFILTER_SCAN;
else if (scaler_flt[VFILTER_VERT].mode) vert_flt = VFILTER_VERT;
else vert_flt = VFILTER_HORZ;
if (!read_video_filter(vert_flt, &vert))
{
vert = horiz;
valid = true;
}
send_video_filters(&horiz, &vert, flt_flags & 0xF);
}
if (!valid) spi_uio_cmd8(UIO_SET_FLTNUM, 0);
send_video_filters(&scaler_flt_data[VFILTER_HORZ], &scaler_flt_data[vert_flt], flt_flags & 0xF);
}
static void setScaler()
{
PROFILE_FUNCTION();
uint32_t arc[4] = {};
for (int i = 0; i < 2; i++)
{
@@ -563,12 +608,15 @@ void video_set_scaler_coeff(int type, const char *name)
{
strcpy(scaler_flt[type].filename, name);
FileSaveConfig(scaler_cfg, &scaler_flt, sizeof(scaler_flt));
read_video_filter(type, &scaler_flt_data[type]);
setScaler();
user_io_send_buttons(1);
}
static void loadScalerCfg()
{
PROFILE_FUNCTION();
sprintf(scaler_cfg, "%s_scaler.cfg", user_io_get_core_name());
memset(scaler_flt, 0, sizeof(scaler_cfg));
if (!FileLoadConfig(scaler_cfg, &scaler_flt, sizeof(scaler_flt)) || scaler_flt[0].mode > 1)
@@ -594,17 +642,21 @@ static void loadScalerCfg()
scaler_flt[VFILTER_SCAN].mode = 1;
}
VideoFilter null;
if (!read_video_filter(VFILTER_HORZ, &null)) memset(&scaler_flt[VFILTER_HORZ], 0, sizeof(scaler_flt[VFILTER_HORZ]));
if (!read_video_filter(VFILTER_VERT, &null)) memset(&scaler_flt[VFILTER_VERT], 0, sizeof(scaler_flt[VFILTER_VERT]));
if (!read_video_filter(VFILTER_SCAN, &null)) memset(&scaler_flt[VFILTER_SCAN], 0, sizeof(scaler_flt[VFILTER_SCAN]));
if (!read_video_filter(VFILTER_HORZ, &scaler_flt_data[VFILTER_HORZ])) memset(&scaler_flt[VFILTER_HORZ], 0, sizeof(scaler_flt[VFILTER_HORZ]));
if (!read_video_filter(VFILTER_VERT, &scaler_flt_data[VFILTER_VERT])) memset(&scaler_flt[VFILTER_VERT], 0, sizeof(scaler_flt[VFILTER_VERT]));
if (!read_video_filter(VFILTER_SCAN, &scaler_flt_data[VFILTER_SCAN])) memset(&scaler_flt[VFILTER_SCAN], 0, sizeof(scaler_flt[VFILTER_SCAN]));
}
static char active_gamma_cfg[1024] = { 0 };
static char gamma_cfg[1024] = { 0 };
static char has_gamma = 0;
static void setGamma()
{
PROFILE_FUNCTION();
if (!memcmp(active_gamma_cfg, gamma_cfg, sizeof(gamma_cfg))) return;
fileTextReader reader = {};
static char filename[1024];
@@ -617,6 +669,7 @@ static void setGamma()
has_gamma = 1;
spi8(0);
DisableIO();
snprintf(filename, sizeof(filename), GAMMA_DIR"/%s", gamma_cfg + 1);
if (FileOpenTextReader(&reader, filename))
@@ -649,6 +702,7 @@ static void setGamma()
DisableIO();
spi_uio_cmd8(UIO_SET_GAMMA, gamma_cfg[0]);
}
memcpy(active_gamma_cfg, gamma_cfg, sizeof(gamma_cfg));
}
int video_get_gamma_en()
@@ -686,6 +740,7 @@ void video_set_gamma_curve(const char *name)
static void loadGammaCfg()
{
PROFILE_FUNCTION();
sprintf(gamma_cfg_path, "%s_gamma.cfg", user_io_get_core_name());
if (!FileLoadConfig(gamma_cfg_path, &gamma_cfg, sizeof(gamma_cfg) - 1) || gamma_cfg[0]>1)
{
@@ -717,6 +772,8 @@ enum
static void setShadowMask()
{
PROFILE_FUNCTION();
static char filename[1024];
has_shadow_mask = 0;
@@ -860,6 +917,8 @@ void video_set_shadow_mask(const char *name)
static void loadShadowMaskCfg()
{
PROFILE_FUNCTION();
sprintf(shadow_mask_cfg_path, "%s_shmask.cfg", user_io_get_core_name());
if (!FileLoadConfig(shadow_mask_cfg_path, &shadow_mask_cfg, sizeof(shadow_mask_cfg) - 1))
{
@@ -1008,20 +1067,9 @@ static void hdmi_config_set_spare(bool val)
}
}
static void hdmi_config()
static void hdmi_config_init()
{
int ypbpr = cfg.ypbpr && cfg.direct_video;
const uint8_t vic_mode = (uint8_t)v_cur.param.vic;
uint8_t pr_flags;
if (cfg.direct_video && is_menu()) pr_flags = 0; // automatic pixel repetition
else if (v_cur.param.pr != 0) pr_flags = 0b01001000; // manual pixel repetition with 2x clock
else pr_flags = 0b01000000; // manual pixel repetition
uint8_t sync_invert = 0;
if (v_cur.param.hpol == 0) sync_invert |= 1 << 5;
if (v_cur.param.vpol == 0) sync_invert |= 1 << 6;
// address, value
uint8_t init_data[] = {
@@ -1058,7 +1106,7 @@ static void hdmi_config()
// DDR Input Edge falling [1]=0 (not using DDR atm).
// Output Colour Space RGB [0]=0.
0x17, (uint8_t)(0b00000010 | sync_invert), // Aspect ratio 16:9 [1]=1, 4:3 [1]=0
0x17, 0b01100010, // Aspect ratio 16:9 [1]=1, 4:3 [1]=0, invert sync polarity
0x18, (uint8_t)(ypbpr ? 0x86 : (cfg.hdmi_limited & 1) ? 0x8D : (cfg.hdmi_limited & 2) ? 0x8E : 0x00), // CSC Scaling Factors and Coefficients for RGB Full->Limited.
0x19, (uint8_t)(ypbpr ? 0xDF : (cfg.hdmi_limited & 1) ? 0xBC : 0xFE), // Taken from table in ADV7513 Programming Guide.
@@ -1087,7 +1135,7 @@ static void hdmi_config()
0x2E, (uint8_t)(ypbpr ? 0x07 : 0x01),
0x2F, (uint8_t)(ypbpr ? 0xE7 : 0x00),
0x3B, pr_flags,
0x3B, 0x0, // Automatic pixel repetition and VIC detection
0x48, 0b00001000, // [6]=0 Normal bus order!
@@ -1112,8 +1160,6 @@ static void hdmi_config()
| ((ypbpr || cfg.hdmi_limited) ? 0b0100 : 0b1000)), // [3:2] RGB Quantization range
// [1:0] Non-Uniform Scaled: 00 - None. 01 - Horiz. 10 - Vert. 11 - Both.
0x3C, vic_mode, // VIC
0x59, (uint8_t)(((ypbpr || cfg.hdmi_limited) ? 0x00 : 0x40) // [7:6] [YQ1 YQ0] YCC Quantization Range: b00 = Limited Range, b01 = Full Range
| (cfg.hdmi_game_mode ? 0x30 : 0x00)), // [5:4] IT Content Type b11 = Game, b00 = Graphics/None
// [3:0] Pixel Repetition Fields b0000 = No Repetition
@@ -1205,6 +1251,55 @@ static void hdmi_config()
}
}
static uint8_t last_sync_invert = 0xff;
static uint8_t last_pr_flags = 0xff;
static uint8_t last_vic_mode = 0xff;
static void hdmi_config_set_mode(vmode_custom_t *vm)
{
PROFILE_FUNCTION();
const uint8_t vic_mode = (uint8_t)vm->param.vic;
uint8_t pr_flags;
if (cfg.direct_video && is_menu()) pr_flags = 0; // automatic pixel repetition
else if (vm->param.pr != 0) pr_flags = 0b01001000; // manual pixel repetition with 2x clock
else pr_flags = 0b01000000; // manual pixel repetition
uint8_t sync_invert = 0;
if (vm->param.hpol == 0) sync_invert |= 1 << 5;
if (vm->param.vpol == 0) sync_invert |= 1 << 6;
if (last_sync_invert == sync_invert && last_pr_flags == pr_flags && last_vic_mode == vic_mode) return;
// address, value
uint8_t init_data[] = {
0x17, (uint8_t)(0b00000010 | sync_invert), // Aspect ratio 16:9 [1]=1, 4:3 [1]=0
0x3B, pr_flags,
0x3C, vic_mode, // VIC
};
int fd = i2c_open(0x39, 0);
if (fd >= 0)
{
for (uint i = 0; i < sizeof(init_data); i += 2)
{
int res = i2c_smbus_write_byte_data(fd, init_data[i], init_data[i + 1]);
if (res < 0) printf("i2c: write error (%02X %02X): %d\n", init_data[i], init_data[i + 1], res);
}
i2c_close(fd);
}
else
{
printf("*** ADV7513 not found on i2c bus! HDMI won't be available!\n");
}
last_pr_flags = pr_flags;
last_sync_invert = sync_invert;
last_vic_mode = vic_mode;
}
static void edid_parse_cea_ext(uint8_t *cea)
{
uint8_t *data_block_end = cea + cea[2];
@@ -1299,8 +1394,6 @@ static int is_edid_valid()
static int get_active_edid()
{
hdmi_config(); // required to get EDID
int fd = i2c_open(0x39, 0);
if (fd < 0)
{
@@ -1467,20 +1560,28 @@ static int get_edid_vmode(vmode_custom_t *v)
static void set_vrr_mode()
{
PROFILE_FUNCTION();
use_vrr = 0;
float vrateh = 100000000;
if (cfg.vrr_mode == 0)
{
hdmi_config_set_spd(0);
hdmi_config_set_spare(0);
if (last_vrr_mode != 0)
{
hdmi_config_set_spd(0);
hdmi_config_set_spare(0);
}
last_vrr_mode = 0;
return;
}
if (current_video_info.vtimeh) vrateh /= current_video_info.vtimeh; else vrateh = 0;
if (cfg.vrr_vesa_framerate) vrateh = cfg.vrr_vesa_framerate;
if (last_vrr_mode == cfg.vrr_mode && last_vrr_rate == vrateh) return;
if ((last_vrr_mode == cfg.vrr_mode) &&
(last_vrr_rate == vrateh) &&
(last_vrr_vfp == v_cur.param.vfp || cfg.vrr_mode != VRR_VESA)) return;
if (!is_edid_valid())
{
@@ -1629,17 +1730,16 @@ static void set_vrr_mode()
}
last_vrr_mode = cfg.vrr_mode;
last_vrr_rate = vrateh;
last_vrr_vfp = v_cur.param.vfp;
if (!supports_vrr() || cfg.vsync_adjust) use_vrr = 0;
}
static char fb_reset_cmd[128] = {};
static void set_video(vmode_custom_t *v, double Fpix)
static void video_set_mode(vmode_custom_t *v, double Fpix)
{
loadGammaCfg();
setGamma();
PROFILE_FUNCTION();
loadScalerCfg();
setGamma();
setScaler();
v_cur = *v;
@@ -1734,35 +1834,10 @@ static void set_video(vmode_custom_t *v, double Fpix)
printf("Fpix=%f\n", v_cur.Fpix);
DisableIO();
hdmi_config();
hdmi_config_set_mode(&v_cur);
int fb_scale = cfg.fb_size;
video_fb_config();
if (fb_scale <= 1)
{
if (((v_cur.item[1] * v_cur.item[5]) > FB_SIZE))
fb_scale = 2;
else
fb_scale = 1;
}
else if (fb_scale == 3) fb_scale = 2;
else if (fb_scale > 4) fb_scale = 4;
const int fb_scale_x = fb_scale;
const int fb_scale_y = v_cur.param.pr == 0 ? fb_scale : fb_scale * 2;
fb_width = v_cur.item[1] / fb_scale_x;
fb_height = v_cur.item[5] / fb_scale_y;
brd_x = cfg.vscale_border / fb_scale_x;
brd_y = cfg.vscale_border / fb_scale_y;
if (fb_enabled) video_fb_enable(1, fb_num);
sprintf(fb_reset_cmd, "echo %d %d %d %d %d >/sys/module/MiSTer_fb/parameters/mode", 8888, 1, fb_width, fb_height, fb_width * 4);
system(fb_reset_cmd);
loadShadowMaskCfg();
setShadowMask();
}
@@ -1882,9 +1957,8 @@ static void fb_init()
spi_uio_cmd16(UIO_SET_FBUF, 0);
}
void video_mode_load()
static void video_mode_load()
{
fb_init();
if (cfg.direct_video && cfg.vsync_adjust)
{
printf("Disabling vsync_adjust because of enabled direct video.\n");
@@ -1921,9 +1995,22 @@ void video_mode_load()
vmode_ntsc = store_custom_video_mode(cfg.video_conf_ntsc, &v_ntsc);
}
}
set_video(&v_def, 0);
}
void video_init()
{
fb_init();
hdmi_config_init();
video_mode_load();
loadGammaCfg();
loadScalerCfg();
loadShadowMaskCfg();
video_set_mode(&v_def, 0);
}
static int api1_5 = 0;
int hasAPI1_5()
{
@@ -2246,6 +2333,12 @@ void video_mode_adjust()
{
current_video_info = video_info;
show_video_info(&video_info, &v_cur);
}
force = false;
if (vid_changed && !is_menu())
{
if (cfg_has_video_sections())
{
cfg_parse();
@@ -2253,60 +2346,85 @@ void video_mode_adjust()
user_io_send_buttons(1);
}
show_video_info(&video_info, &v_cur);
video_scaling_adjust(&video_info, &v_cur);
}
force = false;
if (vid_changed && !is_menu() && (cfg.vsync_adjust || cfg.vscale_mode >= 4))
{
const uint32_t vtime = video_info.vtime;
printf("\033[1;33madjust_video_mode(%u): vsync_adjust=%d vscale_mode=%d.\033[0m\n", vtime, cfg.vsync_adjust, cfg.vscale_mode);
vmode_custom_t new_mode;
bool adjust = video_mode_select(vtime, &new_mode);
video_resolution_adjust(&video_info, &new_mode);
vmode_custom_t *v = &new_mode;
double Fpix = 0;
if (adjust)
if ((cfg.vsync_adjust || cfg.vscale_mode >= 4))
{
Fpix = 100 * (v->item[1] + v->item[2] + v->item[3] + v->item[4]) * (v->item[5] + v->item[6] + v->item[7] + v->item[8]);
Fpix /= vtime;
if (Fpix < 2.f || Fpix > 300.f)
const uint32_t vtime = video_info.vtime;
printf("\033[1;33madjust_video_mode(%u): vsync_adjust=%d vscale_mode=%d.\033[0m\n", vtime, cfg.vsync_adjust, cfg.vscale_mode);
vmode_custom_t new_mode;
bool adjust = video_mode_select(vtime, &new_mode);
video_resolution_adjust(&video_info, &new_mode);
vmode_custom_t *v = &new_mode;
double Fpix = 0;
if (adjust)
{
printf("Estimated Fpix(%.4f MHz) is outside supported range. Canceling auto-adjust.\n", Fpix);
Fpix = 0;
Fpix = 100 * (v->item[1] + v->item[2] + v->item[3] + v->item[4]) * (v->item[5] + v->item[6] + v->item[7] + v->item[8]);
Fpix /= vtime;
if (Fpix < 2.f || Fpix > 300.f)
{
printf("Estimated Fpix(%.4f MHz) is outside supported range. Canceling auto-adjust.\n", Fpix);
Fpix = 0;
}
float hz = 100000000.0f / vtime;
if (cfg.refresh_min && hz < cfg.refresh_min)
{
printf("Estimated frame rate (%f Hz) is less than REFRESH_MIN(%f Hz). Canceling auto-adjust.\n", hz, cfg.refresh_min);
Fpix = 0;
}
if (cfg.refresh_max && hz > cfg.refresh_max)
{
printf("Estimated frame rate (%f Hz) is more than REFRESH_MAX(%f Hz). Canceling auto-adjust.\n", hz, cfg.refresh_max);
Fpix = 0;
}
}
float hz = 100000000.0f / vtime;
if (cfg.refresh_min && hz < cfg.refresh_min)
{
printf("Estimated frame rate (%f Hz) is less than REFRESH_MIN(%f Hz). Canceling auto-adjust.\n", hz, cfg.refresh_min);
Fpix = 0;
}
if (cfg.refresh_max && hz > cfg.refresh_max)
{
printf("Estimated frame rate (%f Hz) is more than REFRESH_MAX(%f Hz). Canceling auto-adjust.\n", hz, cfg.refresh_max);
Fpix = 0;
}
video_set_mode(v, Fpix);
user_io_send_buttons(1);
force = true;
}
else if (cfg_has_video_sections()) // if we have video sections but aren't updating the resolution for other reasons, then do it here
{
video_set_mode(&v_def, 0);
user_io_send_buttons(1);
force = true;
}
else
{
set_vfilter(1); // force update filters in case interlacing changed
}
set_video(v, Fpix);
user_io_send_buttons(1);
force = true;
video_scaling_adjust(&video_info, &v_cur);
}
else
{
set_vfilter(0);
set_vfilter(0); // update filters if flags have changed
}
}
static void fb_write_module_params()
{
int width = fb_width;
int height = fb_height;
offload_add_work([=]
{
FILE *fp = fopen("/sys/module/MiSTer_fb/parameters/mode", "wt");
if (fp)
{
fprintf(fp, "%d %d %d %d %d\n", 8888, 1, width, height, width * 4);
fclose(fp);
}
});
}
void video_fb_enable(int enable, int n)
{
PROFILE_FUNCTION();
if (fb_base)
{
int res = spi_uio_cmd_cont(UIO_SET_FBUF);
@@ -2345,7 +2463,7 @@ void video_fb_enable(int enable, int n)
//printf("Linux frame buffer: %dx%d, stride = %d bytes\n", fb_width, fb_height, fb_width * 4);
if (!fb_num)
{
system(fb_reset_cmd);
fb_write_module_params();
input_switch(0);
}
else
@@ -2384,6 +2502,37 @@ int video_fb_state()
return fb_enabled;
}
static void video_fb_config()
{
PROFILE_FUNCTION();
int fb_scale = cfg.fb_size;
if (fb_scale <= 1)
{
if (((v_cur.item[1] * v_cur.item[5]) > FB_SIZE))
fb_scale = 2;
else
fb_scale = 1;
}
else if (fb_scale == 3) fb_scale = 2;
else if (fb_scale > 4) fb_scale = 4;
const int fb_scale_x = fb_scale;
const int fb_scale_y = v_cur.param.pr == 0 ? fb_scale : fb_scale * 2;
fb_width = v_cur.item[1] / fb_scale_x;
fb_height = v_cur.item[5] / fb_scale_y;
brd_x = cfg.vscale_border / fb_scale_x;
brd_y = cfg.vscale_border / fb_scale_y;
if (fb_enabled) video_fb_enable(1, fb_num);
fb_write_module_params();
}
static void draw_checkers()
{
volatile uint32_t* buf = fb_base + (FB_SIZE*menu_bgn);

View File

@@ -25,6 +25,8 @@ struct VideoInfo
bool rotated;
};
void video_init();
int video_get_scaler_flt(int type);
void video_set_scaler_flt(int type, int n);
char* video_get_scaler_coeff(int type, int only_name = 1);
@@ -41,7 +43,6 @@ char* video_get_shadow_mask(int only_name = 1);
void video_set_shadow_mask(const char *name);
void video_loadPreset(char *name);
void video_mode_load();
void video_mode_adjust();
int hasAPI1_5();