Faster HPS-FPGA block transfers.

This commit is contained in:
sorgelig
2020-08-03 10:43:16 +08:00
parent 1595d71f78
commit f320a94ee3
6 changed files with 320 additions and 76 deletions

View File

@@ -519,6 +519,7 @@ void inline fpga_gpo_write(uint32_t value)
writel(value, (void*)(SOCFPGA_MGR_ADDRESS + 0x10));
}
#define fpga_gpo_writeN(value) writel((value), (void*)(SOCFPGA_MGR_ADDRESS + 0x10))
#define fpga_gpo_read() gpo_copy //readl((void*)(SOCFPGA_MGR_ADDRESS + 0x10))
#define fpga_gpi_read() (int)readl((void*)(SOCFPGA_MGR_ADDRESS + 0x14))
@@ -694,25 +695,304 @@ uint16_t fpga_spi_fast(uint16_t word)
uint32_t gpo = (fpga_gpo_read() & ~(0xFFFF | SSPI_STROBE)) | word;
fpga_gpo_write(gpo);
fpga_gpo_write(gpo | SSPI_STROBE);
fpga_gpo_write(gpo | SSPI_STROBE);
fpga_gpo_write(gpo);
return (uint16_t)fpga_gpi_read();
}
uint32_t fpga_spi_fast_32(uint32_t dword)
void fpga_spi_fast_block_write(const uint16_t *buf, uint32_t length)
{
uint32_t gpo = (fpga_gpo_read() & ~(0xFFFF | SSPI_STROBE)) | (uint16_t)dword;
uint32_t gpoH = (fpga_gpo_read() & ~(0xFFFF | SSPI_STROBE));
uint32_t gpo = gpoH;
// should be optimized for speed by compiler automatically
while (length--)
{
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
}
fpga_gpo_write(gpo);
fpga_gpo_write(gpo | SSPI_STROBE);
gpo = (gpo & ~0xFFFF) | (uint16_t)(dword>>16);
fpga_gpo_write(gpo);
uint16_t ret_low = (uint16_t)fpga_gpi_read();
fpga_gpo_write(gpo | SSPI_STROBE);
fpga_gpo_write(gpo | SSPI_STROBE);
fpga_gpo_write(gpo);
return (((uint16_t)fpga_gpi_read()) << 16) | ret_low;
}
void fpga_spi_fast_block_read(uint16_t *buf, uint32_t length)
{
uint32_t gpo = (fpga_gpo_read() & ~(0xFFFF | SSPI_STROBE));
uint32_t rem = length % 16;
length /= 16;
// not optimized by compiler automatically
// so do manual optimization for speed.
while (length--)
{
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
}
while (rem--)
{
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint16_t)fpga_gpi_read();
}
}
void fpga_spi_fast_block_write_8(const uint8_t *buf, uint32_t length)
{
uint32_t gpoH = (fpga_gpo_read() & ~(0xFFFF | SSPI_STROBE));
uint32_t gpo = gpoH;
uint32_t rem = length % 16;
length /= 16;
// not optimized by compiler automatically
// so do manual optimization for speed.
while (length--)
{
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
}
while (rem--)
{
gpo = gpoH | *buf++;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
}
fpga_gpo_write(gpo);
}
void fpga_spi_fast_block_read_8(uint8_t *buf, uint32_t length)
{
uint32_t gpo = (fpga_gpo_read() & ~(0xFFFF | SSPI_STROBE));
uint32_t rem = length % 16;
length /= 16;
// not optimized by compiler automatically
// so do manual optimization for speed.
while (length--)
{
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
}
while (rem--)
{
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
*buf++ = (uint8_t)fpga_gpi_read();
}
}
void fpga_spi_fast_block_write_be(const uint16_t *buf, uint32_t length)
{
uint32_t gpoH = (fpga_gpo_read() & ~(0xFFFF | SSPI_STROBE));
uint32_t gpo = gpoH;
// should be optimized for speed by compiler automatically
while (length--)
{
uint16_t tmp = *buf++;
tmp = (tmp << 8) | (tmp >> 8);
gpo = gpoH | tmp;
fpga_gpo_writeN(gpo);
fpga_gpo_writeN(gpo | SSPI_STROBE);
}
fpga_gpo_write(gpo);
}
void fpga_spi_fast_block_read_be(uint16_t *buf, uint32_t length)
{
uint32_t gpo = (fpga_gpo_read() & ~(0xFFFF | SSPI_STROBE));
// should be optimized for speed by compiler automatically
while (length--)
{
fpga_gpo_writeN(gpo | SSPI_STROBE);
fpga_gpo_writeN(gpo);
uint16_t tmp = (uint16_t)fpga_gpi_read();
*buf++ = (tmp << 8) | (tmp >> 8);
}
}

View File

@@ -12,7 +12,13 @@ int fpga_io_init();
void fpga_spi_en(uint32_t mask, uint32_t en);
uint16_t fpga_spi(uint16_t word);
uint16_t fpga_spi_fast(uint16_t word);
uint32_t fpga_spi_fast_32(uint32_t dword);
void fpga_spi_fast_block_write(const uint16_t *buf, uint32_t length);
void fpga_spi_fast_block_read(uint16_t *buf, uint32_t length);
void fpga_spi_fast_block_write_8(const uint8_t *buf, uint32_t length);
void fpga_spi_fast_block_read_8(uint8_t *buf, uint32_t length);
void fpga_spi_fast_block_write_be(const uint16_t *buf, uint32_t length);
void fpga_spi_fast_block_read_be(uint16_t *buf, uint32_t length);
void fpga_set_led(uint32_t on);
int fpga_get_buttons();

48
spi.cpp
View File

@@ -192,52 +192,12 @@ void spi_write(const uint8_t *addr, uint16_t len, int wide)
void spi_block_read(uint8_t *addr, int wide)
{
if (wide)
{
uint16_t len16 = 256;
uint16_t *a16 = (uint16_t*)addr;
while (len16--) *a16++ = fpga_spi_fast(0);
}
else
{
uint16_t len = 512;
while (len--) *addr++ = fpga_spi_fast(0);
}
if (wide) fpga_spi_fast_block_read((uint16_t*)addr, 256);
else fpga_spi_fast_block_read_8(addr, 512);
}
void spi_block_write(const uint8_t *addr, int wide)
{
if (wide)
{
uint16_t len16 = 256;
uint16_t *a16 = (uint16_t*)addr;
while (len16--) fpga_spi_fast(*a16++);
}
else
{
uint16_t len = 512;
while (len--) fpga_spi_fast(*addr++);
}
}
void spi_block_write_16be(const uint16_t *addr)
{
uint16_t len = 256;
uint16_t tmp;
while (len--)
{
tmp = *addr++;
fpga_spi_fast(SWAPW(tmp));
}
}
void spi_block_read_16be(uint16_t *addr)
{
uint16_t len = 256;
uint16_t tmp;
while (len--)
{
tmp = fpga_spi_fast(0xFFFF);
*addr++ = SWAPW(tmp);
}
if (wide) fpga_spi_fast_block_write((const uint16_t*)addr, 256);
else fpga_spi_fast_block_write_8(addr, 512);
}

2
spi.h
View File

@@ -43,8 +43,6 @@ void spi_read(uint8_t *addr, uint16_t len, int wide);
void spi_write(const uint8_t *addr, uint16_t len, int wide);
void spi_block_read(uint8_t *addr, int wide);
void spi_block_write(const uint8_t *addr, int wide);
void spi_block_write_16be(const uint16_t *addr);
void spi_block_read_16be(uint16_t *addr);
/* OSD related SPI functions */
void EnableOsd_on(int target);

View File

@@ -394,10 +394,10 @@ static void ATA_IdentifyDevice(uint8_t* tfr, hdfTYPE *hdf)
WriteTaskFile(0, tfr[2], tfr[3], tfr[4], tfr[5], tfr[6]);
WriteStatus(IDE_STATUS_RDY); // pio in (class 1) command type
EnableFpga();
spi_w(CMD_IDE_DATA_WR<<8); // write data command
spi_w(0);
spi_w(0);
spi_block_write_16be((uint16_t*)sector_buffer);
fpga_spi_fast(CMD_IDE_DATA_WR<<8); // write data command
fpga_spi_fast(0);
fpga_spi_fast(0);
fpga_spi_fast_block_write_be((uint16_t*)sector_buffer, 256);
DisableFpga();
WriteStatus(IDE_STATUS_END | IDE_STATUS_IRQ);
}
@@ -506,20 +506,20 @@ static void ReadSector(hdfTYPE *hdf)
static void SendSector()
{
EnableFpga();
spi_w(CMD_IDE_DATA_WR << 8); // write data command
spi_w(0);
spi_w(0);
spi_block_write_16be((uint16_t*)sector_buffer);
fpga_spi_fast(CMD_IDE_DATA_WR << 8); // write data command
fpga_spi_fast(0);
fpga_spi_fast(0);
fpga_spi_fast_block_write_be((uint16_t*)sector_buffer, 256);
DisableFpga();
}
static void RecvSector()
{
EnableFpga();
spi_w(CMD_IDE_DATA_RD << 8); // read data command
spi_w(0);
spi_w(0);
spi_block_read_16be((uint16_t*)sector_buffer);
fpga_spi_fast(CMD_IDE_DATA_RD << 8); // read data command
fpga_spi_fast(0);
fpga_spi_fast(0);
fpga_spi_fast_block_read_be((uint16_t*)sector_buffer, 256);
DisableFpga();
}

View File

@@ -123,7 +123,7 @@ static void dma_sendbuf(uint32_t address, uint32_t length, uint32_t *data)
EnableIO();
spi8(UIO_DMA_WRITE);
spi32_w(address);
if(address == IMG_TYPE_HDD0_FAST || address == IMG_TYPE_HDD1_FAST) while (length--) fpga_spi_fast_32(*data++);
if (address == IMG_TYPE_HDD0_FAST || address == IMG_TYPE_HDD1_FAST) fpga_spi_fast_block_write((uint16_t*)data, length * 2);
else while (length--) spi32_w(*data++);
DisableIO();
}
@@ -133,7 +133,7 @@ static void dma_recvbuf(uint32_t address, uint32_t length, uint32_t *data)
EnableIO();
spi8(UIO_DMA_READ);
spi32_w(address);
if (address == IMG_TYPE_HDD0_FAST || address == IMG_TYPE_HDD1_FAST) while (length--) *data++ = fpga_spi_fast_32(0);
if (address == IMG_TYPE_HDD0_FAST || address == IMG_TYPE_HDD1_FAST) fpga_spi_fast_block_read((uint16_t*)data, length * 2);
else while (length--) *data++ = spi32_w(0);
DisableIO();
}