gdma: set transfer ability
This commit is contained in:
@@ -74,6 +74,8 @@ struct gdma_channel_t {
|
||||
intr_handle_t intr; // per-channel interrupt handle
|
||||
gdma_channel_direction_t direction; // channel direction
|
||||
int periph_id; // Peripheral instance ID, indicates which peripheral is connected to this GDMA channel
|
||||
size_t sram_alignment; // alignment for memory in SRAM
|
||||
size_t psram_alignment; // alignment for memory in PSRAM
|
||||
esp_err_t (*del)(gdma_channel_t *channel); // channel deletion function, it's polymorphic, see `gdma_del_tx_channel` or `gdma_del_rx_channel`
|
||||
};
|
||||
|
||||
@@ -271,6 +273,67 @@ err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
esp_err_t gdma_set_transfer_ability(gdma_channel_handle_t dma_chan, const gdma_transfer_ability_t *ability)
|
||||
{
|
||||
esp_err_t ret = ESP_OK;
|
||||
gdma_pair_t *pair = NULL;
|
||||
gdma_group_t *group = NULL;
|
||||
bool en_burst = true;
|
||||
ESP_GOTO_ON_FALSE(dma_chan, ESP_ERR_INVALID_ARG, err, TAG, "invalid argument");
|
||||
pair = dma_chan->pair;
|
||||
group = pair->group;
|
||||
size_t sram_alignment = ability->sram_trans_align;
|
||||
size_t psram_alignment = ability->psram_trans_align;
|
||||
// alignment should be 2^n
|
||||
ESP_GOTO_ON_FALSE((sram_alignment & (sram_alignment - 1)) == 0, ESP_ERR_INVALID_ARG, err, TAG, "invalid sram alignment: %zu", sram_alignment);
|
||||
|
||||
#if SOC_GDMA_SUPPORT_PSRAM
|
||||
int block_size_index = 0;
|
||||
switch (psram_alignment) {
|
||||
case 64: // 64 Bytes alignment
|
||||
block_size_index = GDMA_LL_EXT_MEM_BK_SIZE_64B;
|
||||
break;
|
||||
case 32: // 32 Bytes alignment
|
||||
block_size_index = GDMA_LL_EXT_MEM_BK_SIZE_32B;
|
||||
break;
|
||||
case 16: // 16 Bytes alignment
|
||||
block_size_index = GDMA_LL_EXT_MEM_BK_SIZE_16B;
|
||||
break;
|
||||
case 0: // no alignment is requirement
|
||||
block_size_index = GDMA_LL_EXT_MEM_BK_SIZE_16B;
|
||||
psram_alignment = SOC_GDMA_PSRAM_MIN_ALIGN; // fall back to minimal alignment
|
||||
break;
|
||||
default:
|
||||
ESP_GOTO_ON_FALSE(false, ESP_ERR_INVALID_ARG, err, TAG, "invalid psram alignment: %zu", psram_alignment);
|
||||
break;
|
||||
}
|
||||
#endif // #if SOC_GDMA_SUPPORT_PSRAM
|
||||
|
||||
if (dma_chan->direction == GDMA_CHANNEL_DIRECTION_TX) {
|
||||
// TX channel can always enable burst mode, no matter data alignment
|
||||
gdma_ll_tx_enable_data_burst(group->hal.dev, pair->pair_id, true);
|
||||
gdma_ll_tx_enable_descriptor_burst(group->hal.dev, pair->pair_id, true);
|
||||
#if SOC_GDMA_SUPPORT_PSRAM
|
||||
gdma_ll_tx_set_block_size_psram(group->hal.dev, pair->pair_id, block_size_index);
|
||||
#endif // #if SOC_GDMA_SUPPORT_PSRAM
|
||||
} else {
|
||||
// RX channel burst mode depends on specific data alignment
|
||||
en_burst = sram_alignment >= 4;
|
||||
gdma_ll_rx_enable_data_burst(group->hal.dev, pair->pair_id, en_burst);
|
||||
gdma_ll_rx_enable_descriptor_burst(group->hal.dev, pair->pair_id, en_burst);
|
||||
#if SOC_GDMA_SUPPORT_PSRAM
|
||||
gdma_ll_rx_set_block_size_psram(group->hal.dev, pair->pair_id, block_size_index);
|
||||
#endif // #if SOC_GDMA_SUPPORT_PSRAM
|
||||
}
|
||||
|
||||
dma_chan->sram_alignment = sram_alignment;
|
||||
dma_chan->psram_alignment = psram_alignment;
|
||||
ESP_LOGD(TAG, "%s channel (%d,%d), (%zu:%zu) bytes aligned, burst %s", dma_chan->direction == GDMA_CHANNEL_DIRECTION_TX ? "tx" : "rx",
|
||||
group->group_id, pair->pair_id, sram_alignment, psram_alignment, en_burst ? "enabled" : "disabled");
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
esp_err_t gdma_apply_strategy(gdma_channel_handle_t dma_chan, const gdma_strategy_config_t *config)
|
||||
{
|
||||
esp_err_t ret = ESP_OK;
|
||||
|
||||
@@ -59,10 +59,23 @@ typedef struct {
|
||||
gdma_channel_handle_t sibling_chan; /*!< DMA sibling channel handle (NULL means having sibling is not necessary) */
|
||||
gdma_channel_direction_t direction; /*!< DMA channel direction */
|
||||
struct {
|
||||
int reserve_sibling: 1; /*!< If set, DMA channel allocator would prefer to allocate new channel in a new pair, and reserve sibling channel for future use */
|
||||
int reserve_sibling: 1; /*!< If set, DMA channel allocator would prefer to allocate new channel in a new pair, and reserve sibling channel for future use */
|
||||
} flags;
|
||||
} gdma_channel_alloc_config_t;
|
||||
|
||||
/**
|
||||
* @brief GDMA transfer ability
|
||||
*
|
||||
* @note The alignment set in this structure is **not** a guarantee that gdma driver will take care of the nonalignment cases.
|
||||
* Actually the GDMA driver has no knowledge about the DMA buffer (address and size) used by upper layer.
|
||||
* So it's the responsibility of the **upper layer** to take care of the buffer address and size.
|
||||
*
|
||||
*/
|
||||
typedef struct {
|
||||
size_t sram_trans_align; /*!< DMA transfer alignment for memory in SRAM, in bytes. The driver enables/disables burst mode based on this value. 0 means no alignment is required */
|
||||
size_t psram_trans_align; /*!< DMA transfer alignment for memory in PSRAM, in bytes. The driver sets proper burst block size based on the alignment value. 0 means no alignment is required */
|
||||
} gdma_transfer_ability_t;
|
||||
|
||||
/**
|
||||
* @brief Type of GDMA event data
|
||||
*
|
||||
@@ -80,6 +93,9 @@ typedef struct {
|
||||
* @param event_data GDMA event data
|
||||
* @param user_data User registered data from `gdma_register_tx_event_callbacks` or `gdma_register_rx_event_callbacks`
|
||||
*
|
||||
* @return Whether a task switch is needed after the callback function returns,
|
||||
* this is usually due to the callback wakes up some high priority task.
|
||||
*
|
||||
*/
|
||||
typedef bool (*gdma_event_callback_t)(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data);
|
||||
|
||||
@@ -172,6 +188,18 @@ esp_err_t gdma_connect(gdma_channel_handle_t dma_chan, gdma_trigger_t trig_perip
|
||||
*/
|
||||
esp_err_t gdma_disconnect(gdma_channel_handle_t dma_chan);
|
||||
|
||||
/**
|
||||
* @brief Set DMA channel transfer ability
|
||||
*
|
||||
* @param[in] dma_chan GDMA channel handle, allocated by `gdma_new_channel`
|
||||
* @param[in] ability Transfer ability, e.g. alignment
|
||||
* @return
|
||||
* - ESP_OK: Set DMA channel transfer ability successfully
|
||||
* - ESP_ERR_INVALID_ARG: Set DMA channel transfer ability failed because of invalid argument
|
||||
* - ESP_FAIL: Set DMA channel transfer ability failed because of other error
|
||||
*/
|
||||
esp_err_t gdma_set_transfer_ability(gdma_channel_handle_t dma_chan, const gdma_transfer_ability_t *ability);
|
||||
|
||||
/**
|
||||
* @brief Apply channel strategy for GDMA channel
|
||||
*
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <sys/param.h>
|
||||
#include "freertos/FreeRTOS.h"
|
||||
#include "freertos/semphr.h"
|
||||
#include "hal/dma_types.h"
|
||||
@@ -22,6 +24,8 @@
|
||||
|
||||
static const char *TAG = "async_memcpy";
|
||||
|
||||
#define ALIGN_DOWN(val, align) ((val) & ~((align) - 1))
|
||||
|
||||
/**
|
||||
* @brief Type of async mcp stream
|
||||
* mcp stream inherits DMA descriptor, besides that, it has a callback function member
|
||||
@@ -43,7 +47,8 @@ typedef struct async_memcpy_context_t {
|
||||
dma_descriptor_t *tx_desc; // pointer to the next free TX descriptor
|
||||
dma_descriptor_t *rx_desc; // pointer to the next free RX descriptor
|
||||
dma_descriptor_t *next_rx_desc_to_check; // pointer to the next RX descriptor to recycle
|
||||
uint32_t max_stream_num; // maximum number of streams
|
||||
uint32_t max_stream_num; // maximum number of streams
|
||||
size_t max_dma_buffer_size; // maximum DMA buffer size
|
||||
async_memcpy_stream_t *out_streams; // pointer to the first TX stream
|
||||
async_memcpy_stream_t *in_streams; // pointer to the first RX stream
|
||||
async_memcpy_stream_t streams_pool[0]; // stream pool (TX + RX), the size is configured during driver installation
|
||||
@@ -82,9 +87,14 @@ esp_err_t esp_async_memcpy_install(const async_memcpy_config_t *config, async_me
|
||||
mcp_hdl->rx_desc = &mcp_hdl->in_streams[0].desc;
|
||||
mcp_hdl->next_rx_desc_to_check = &mcp_hdl->in_streams[0].desc;
|
||||
mcp_hdl->spinlock = (portMUX_TYPE)portMUX_INITIALIZER_UNLOCKED;
|
||||
mcp_hdl->mcp_impl.sram_trans_align = config->sram_trans_align;
|
||||
mcp_hdl->mcp_impl.psram_trans_align = config->psram_trans_align;
|
||||
size_t trans_align = MAX(config->sram_trans_align, config->psram_trans_align);
|
||||
mcp_hdl->max_dma_buffer_size = trans_align ? ALIGN_DOWN(DMA_DESCRIPTOR_BUFFER_MAX_SIZE, trans_align) : DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
|
||||
|
||||
// initialize implementation layer
|
||||
async_memcpy_impl_init(&mcp_hdl->mcp_impl);
|
||||
ret = async_memcpy_impl_init(&mcp_hdl->mcp_impl);
|
||||
ESP_GOTO_ON_ERROR(ret, err, TAG, "DMA M2M init failed");
|
||||
|
||||
*asmcp = mcp_hdl;
|
||||
|
||||
@@ -121,14 +131,14 @@ static int async_memcpy_prepare_receive(async_memcpy_t asmcp, void *buffer, size
|
||||
dma_descriptor_t *start = desc;
|
||||
dma_descriptor_t *end = desc;
|
||||
|
||||
while (size > DMA_DESCRIPTOR_BUFFER_MAX_SIZE) {
|
||||
while (size > asmcp->max_dma_buffer_size) {
|
||||
if (desc->dw0.owner != DMA_DESCRIPTOR_BUFFER_OWNER_DMA) {
|
||||
desc->dw0.suc_eof = 0;
|
||||
desc->dw0.size = DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
|
||||
desc->dw0.size = asmcp->max_dma_buffer_size;
|
||||
desc->buffer = &buf[prepared_length];
|
||||
desc = desc->next; // move to next descriptor
|
||||
prepared_length += DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
|
||||
size -= DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
|
||||
prepared_length += asmcp->max_dma_buffer_size;
|
||||
size -= asmcp->max_dma_buffer_size;
|
||||
} else {
|
||||
// out of RX descriptors
|
||||
goto _exit;
|
||||
@@ -162,15 +172,15 @@ static int async_memcpy_prepare_transmit(async_memcpy_t asmcp, void *buffer, siz
|
||||
dma_descriptor_t *start = desc;
|
||||
dma_descriptor_t *end = desc;
|
||||
|
||||
while (len > DMA_DESCRIPTOR_BUFFER_MAX_SIZE) {
|
||||
while (len > asmcp->max_dma_buffer_size) {
|
||||
if (desc->dw0.owner != DMA_DESCRIPTOR_BUFFER_OWNER_DMA) {
|
||||
desc->dw0.suc_eof = 0; // not the end of the transaction
|
||||
desc->dw0.size = DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
|
||||
desc->dw0.length = DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
|
||||
desc->dw0.size = asmcp->max_dma_buffer_size;
|
||||
desc->dw0.length = asmcp->max_dma_buffer_size;
|
||||
desc->buffer = &buf[prepared_length];
|
||||
desc = desc->next; // move to next descriptor
|
||||
prepared_length += DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
|
||||
len -= DMA_DESCRIPTOR_BUFFER_MAX_SIZE;
|
||||
prepared_length += asmcp->max_dma_buffer_size;
|
||||
len -= asmcp->max_dma_buffer_size;
|
||||
} else {
|
||||
// out of TX descriptors
|
||||
goto _exit;
|
||||
@@ -222,14 +232,20 @@ esp_err_t esp_async_memcpy(async_memcpy_t asmcp, void *dst, void *src, size_t n,
|
||||
size_t rx_prepared_size = 0;
|
||||
size_t tx_prepared_size = 0;
|
||||
ESP_GOTO_ON_FALSE(asmcp, ESP_ERR_INVALID_ARG, err, TAG, "mcp handle can't be null");
|
||||
ESP_GOTO_ON_FALSE(async_memcpy_impl_is_buffer_address_valid(&asmcp->mcp_impl, src, dst), ESP_ERR_INVALID_ARG, err, TAG, "buffer address not valid");
|
||||
ESP_GOTO_ON_FALSE(n <= DMA_DESCRIPTOR_BUFFER_MAX_SIZE * asmcp->max_stream_num, ESP_ERR_INVALID_ARG, err, TAG, "buffer size too large");
|
||||
ESP_GOTO_ON_FALSE(async_memcpy_impl_is_buffer_address_valid(&asmcp->mcp_impl, src, dst), ESP_ERR_INVALID_ARG, err, TAG, "buffer address not valid: %p -> %p", src, dst);
|
||||
ESP_GOTO_ON_FALSE(n <= asmcp->max_dma_buffer_size * asmcp->max_stream_num, ESP_ERR_INVALID_ARG, err, TAG, "buffer size too large");
|
||||
if (asmcp->mcp_impl.sram_trans_align) {
|
||||
ESP_GOTO_ON_FALSE(((n & (asmcp->mcp_impl.sram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, err, TAG, "copy size should align to %d bytes", asmcp->mcp_impl.sram_trans_align);
|
||||
}
|
||||
if (asmcp->mcp_impl.psram_trans_align) {
|
||||
ESP_GOTO_ON_FALSE(((n & (asmcp->mcp_impl.psram_trans_align - 1)) == 0), ESP_ERR_INVALID_ARG, err, TAG, "copy size should align to %d bytes", asmcp->mcp_impl.psram_trans_align);
|
||||
}
|
||||
|
||||
// Prepare TX and RX descriptor
|
||||
portENTER_CRITICAL_SAFE(&asmcp->spinlock);
|
||||
rx_prepared_size = async_memcpy_prepare_receive(asmcp, dst, n, &rx_start_desc, &rx_end_desc);
|
||||
tx_prepared_size = async_memcpy_prepare_transmit(asmcp, src, n, &tx_start_desc, &tx_end_desc);
|
||||
if ((rx_prepared_size == n) && (tx_prepared_size == n)) {
|
||||
if (rx_start_desc && tx_start_desc && (rx_prepared_size == n) && (tx_prepared_size == n)) {
|
||||
// register user callback to the last descriptor
|
||||
async_memcpy_stream_t *mcp_stream = __containerof(rx_end_desc, async_memcpy_stream_t, desc);
|
||||
mcp_stream->cb = cb_isr;
|
||||
|
||||
@@ -54,8 +54,10 @@ typedef bool (*async_memcpy_isr_cb_t)(async_memcpy_t mcp_hdl, async_memcpy_event
|
||||
*
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t backlog; /*!< Maximum number of streams that can be handled simultaneously */
|
||||
uint32_t flags; /*!< Extra flags to control async memcpy feature */
|
||||
uint32_t backlog; /*!< Maximum number of streams that can be handled simultaneously */
|
||||
size_t sram_trans_align; /*!< DMA transfer alignment (both in size and address) for SRAM memory */
|
||||
size_t psram_trans_align; /*!< DMA transfer alignment (both in size and address) for PSRAM memory */
|
||||
uint32_t flags; /*!< Extra flags to control async memcpy feature */
|
||||
} async_memcpy_config_t;
|
||||
|
||||
/**
|
||||
@@ -63,9 +65,11 @@ typedef struct {
|
||||
*
|
||||
*/
|
||||
#define ASYNC_MEMCPY_DEFAULT_CONFIG() \
|
||||
{ \
|
||||
.backlog = 8, \
|
||||
.flags = 0, \
|
||||
{ \
|
||||
.backlog = 8, \
|
||||
.sram_trans_align = 0, \
|
||||
.psram_trans_align = 0, \
|
||||
.flags = 0, \
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -61,9 +61,21 @@ esp_err_t async_memcpy_impl_init(async_memcpy_impl_t *impl)
|
||||
|
||||
gdma_strategy_config_t strategy_config = {
|
||||
.auto_update_desc = true,
|
||||
.owner_check = true
|
||||
.owner_check = true,
|
||||
};
|
||||
|
||||
gdma_transfer_ability_t transfer_ability = {
|
||||
.sram_trans_align = impl->sram_trans_align,
|
||||
.psram_trans_align = impl->psram_trans_align,
|
||||
};
|
||||
ret = gdma_set_transfer_ability(impl->tx_channel, &transfer_ability);
|
||||
if (ret != ESP_OK) {
|
||||
goto err;
|
||||
}
|
||||
ret = gdma_set_transfer_ability(impl->rx_channel, &transfer_ability);
|
||||
if (ret != ESP_OK) {
|
||||
goto err;
|
||||
}
|
||||
gdma_apply_strategy(impl->tx_channel, &strategy_config);
|
||||
gdma_apply_strategy(impl->rx_channel, &strategy_config);
|
||||
|
||||
@@ -108,5 +120,15 @@ esp_err_t async_memcpy_impl_restart(async_memcpy_impl_t *impl)
|
||||
|
||||
bool async_memcpy_impl_is_buffer_address_valid(async_memcpy_impl_t *impl, void *src, void *dst)
|
||||
{
|
||||
return true;
|
||||
bool valid = true;
|
||||
if (esp_ptr_external_ram(dst)) {
|
||||
if (impl->psram_trans_align) {
|
||||
valid = valid && (((intptr_t)dst & (impl->psram_trans_align - 1)) == 0);
|
||||
}
|
||||
} else {
|
||||
if (impl->sram_trans_align) {
|
||||
valid = valid && (((intptr_t)dst & (impl->sram_trans_align - 1)) == 0);
|
||||
}
|
||||
}
|
||||
return valid;
|
||||
}
|
||||
|
||||
@@ -46,6 +46,8 @@ typedef struct {
|
||||
gdma_channel_handle_t rx_channel;
|
||||
#endif
|
||||
intptr_t rx_eof_addr;
|
||||
size_t sram_trans_align;
|
||||
size_t psram_trans_align;
|
||||
bool isr_need_yield; // if current isr needs a yield for higher priority task
|
||||
} async_memcpy_impl_t;
|
||||
|
||||
|
||||
@@ -12,37 +12,75 @@
|
||||
#include "ccomp_timer.h"
|
||||
#include "esp_async_memcpy.h"
|
||||
#include "soc/soc_caps.h"
|
||||
#include "hal/dma_types.h"
|
||||
|
||||
#if SOC_CP_DMA_SUPPORTED || SOC_GDMA_SUPPORTED
|
||||
|
||||
#define ALIGN_UP(addr, align) (((addr) + (align)-1) & ~((align)-1))
|
||||
#define ALIGN_DOWN(size, align) ((size) & ~((align) - 1))
|
||||
|
||||
static void async_memcpy_setup_testbench(uint32_t seed, uint32_t *buffer_size, uint8_t **src_buf, uint8_t **dst_buf, uint8_t **from_addr, uint8_t **to_addr, uint32_t align)
|
||||
typedef struct {
|
||||
uint32_t seed;
|
||||
uint32_t buffer_size;
|
||||
uint8_t *src_buf;
|
||||
uint8_t *dst_buf;
|
||||
uint8_t *from_addr;
|
||||
uint8_t *to_addr;
|
||||
uint32_t align;
|
||||
uint32_t offset;
|
||||
bool src_in_psram;
|
||||
bool dst_in_psram;
|
||||
} memcpy_testbench_context_t;
|
||||
|
||||
static void async_memcpy_setup_testbench(memcpy_testbench_context_t *test_context)
|
||||
{
|
||||
srand(seed);
|
||||
srand(test_context->seed);
|
||||
printf("allocating memory buffer...\r\n");
|
||||
// memory copy from/to PSRAM is not allowed
|
||||
*src_buf = heap_caps_malloc(*buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
|
||||
*dst_buf = heap_caps_calloc(1, *buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
|
||||
|
||||
TEST_ASSERT_NOT_NULL_MESSAGE(*src_buf, "allocate source buffer failed");
|
||||
TEST_ASSERT_NOT_NULL_MESSAGE(*dst_buf, "allocate destination buffer failed");
|
||||
|
||||
*from_addr = (uint8_t *)ALIGN_UP((uint32_t)(*src_buf), 4);
|
||||
*to_addr = (uint8_t *)ALIGN_UP((uint32_t)(*dst_buf), 4);
|
||||
uint8_t gap = MAX(*from_addr - *src_buf, *to_addr - *dst_buf);
|
||||
*buffer_size -= gap;
|
||||
|
||||
*from_addr += align;
|
||||
*to_addr += align;
|
||||
*buffer_size -= align;
|
||||
|
||||
printf("...size %d Bytes, src@%p, dst@%p\r\n", *buffer_size, *from_addr, *to_addr);
|
||||
|
||||
printf("fill src buffer with random data\r\n");
|
||||
for (int i = 0; i < *buffer_size; i++) {
|
||||
(*from_addr)[i] = rand() % 256;
|
||||
uint32_t buffer_size = test_context->buffer_size;
|
||||
uint8_t *src_buf = NULL;
|
||||
uint8_t *dst_buf = NULL;
|
||||
uint8_t *from_addr = NULL;
|
||||
uint8_t *to_addr = NULL;
|
||||
#if CONFIG_SPIRAM && SOC_GDMA_SUPPORT_PSRAM
|
||||
if (test_context->src_in_psram) {
|
||||
src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_SPIRAM);
|
||||
} else {
|
||||
src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
|
||||
}
|
||||
if (test_context->dst_in_psram) {
|
||||
dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_SPIRAM);
|
||||
} else {
|
||||
dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
|
||||
}
|
||||
#else
|
||||
src_buf = heap_caps_malloc(buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
|
||||
dst_buf = heap_caps_calloc(1, buffer_size, MALLOC_CAP_8BIT | MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
|
||||
#endif
|
||||
TEST_ASSERT_NOT_NULL_MESSAGE(src_buf, "allocate source buffer failed");
|
||||
TEST_ASSERT_NOT_NULL_MESSAGE(dst_buf, "allocate destination buffer failed");
|
||||
// address alignment
|
||||
from_addr = (uint8_t *)ALIGN_UP((uint32_t)(src_buf), test_context->align);
|
||||
to_addr = (uint8_t *)ALIGN_UP((uint32_t)(dst_buf), test_context->align);
|
||||
uint8_t gap = MAX(from_addr - src_buf, to_addr - dst_buf);
|
||||
buffer_size -= gap;
|
||||
// size alignment
|
||||
buffer_size = ALIGN_DOWN(buffer_size, test_context->align);
|
||||
// adding extra offset
|
||||
from_addr += test_context->offset;
|
||||
to_addr += test_context->offset;
|
||||
buffer_size -= test_context->offset;
|
||||
|
||||
printf("...size %d Bytes, src@%p, dst@%p\r\n", buffer_size, from_addr, to_addr);
|
||||
printf("fill src buffer with random data\r\n");
|
||||
for (int i = 0; i < buffer_size; i++) {
|
||||
from_addr[i] = rand() % 256;
|
||||
}
|
||||
// return value
|
||||
test_context->buffer_size = buffer_size;
|
||||
test_context->src_buf = src_buf;
|
||||
test_context->dst_buf = dst_buf;
|
||||
test_context->from_addr = from_addr;
|
||||
test_context->to_addr = to_addr;
|
||||
}
|
||||
|
||||
static void async_memcpy_verify_and_clear_testbench(uint32_t seed, uint32_t buffer_size, uint8_t *src_buf, uint8_t *dst_buf, uint8_t *from_addr, uint8_t *to_addr)
|
||||
@@ -91,18 +129,18 @@ TEST_CASE("memory copy by DMA one by one", "[async mcp]")
|
||||
TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
|
||||
|
||||
uint32_t test_buffer_len[] = {256, 512, 1024, 2048, 4096, 5011};
|
||||
uint8_t *sbuf = NULL;
|
||||
uint8_t *dbuf = NULL;
|
||||
uint8_t *from = NULL;
|
||||
uint8_t *to = NULL;
|
||||
memcpy_testbench_context_t test_context = {
|
||||
.align = 4,
|
||||
};
|
||||
|
||||
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
|
||||
// Test different align edge
|
||||
for (int align = 0; align < 4; align++) {
|
||||
async_memcpy_setup_testbench(i, &test_buffer_len[i], &sbuf, &dbuf, &from, &to, align);
|
||||
TEST_ESP_OK(esp_async_memcpy(driver, to, from, test_buffer_len[i], NULL, NULL));
|
||||
async_memcpy_verify_and_clear_testbench(i, test_buffer_len[i], sbuf, dbuf, from, to);
|
||||
|
||||
for (int off = 0; off < 4; off++) {
|
||||
test_context.buffer_size = test_buffer_len[i];
|
||||
test_context.seed = i;
|
||||
async_memcpy_setup_testbench(&test_context);
|
||||
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, NULL, NULL));
|
||||
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
|
||||
vTaskDelay(pdMS_TO_TICKS(100));
|
||||
}
|
||||
}
|
||||
@@ -117,86 +155,177 @@ TEST_CASE("memory copy by DMA on the fly", "[async mcp]")
|
||||
TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
|
||||
|
||||
uint32_t test_buffer_len[] = {512, 1024, 2048, 4096, 5011};
|
||||
uint8_t *sbufs[] = {0, 0, 0, 0, 0};
|
||||
uint8_t *dbufs[] = {0, 0, 0, 0, 0};
|
||||
uint8_t *froms[] = {0, 0, 0, 0, 0};
|
||||
uint8_t *tos[] = {0, 0, 0, 0, 0};
|
||||
memcpy_testbench_context_t test_context[] = {
|
||||
{.align = 4}, {.align = 4}, {.align = 4}, {.align = 4}, {.align = 4},
|
||||
};
|
||||
|
||||
// Aligned case
|
||||
for (int i = 0; i < sizeof(sbufs) / sizeof(sbufs[0]); i++) {
|
||||
async_memcpy_setup_testbench(i, &test_buffer_len[i], &sbufs[i], &dbufs[i], &froms[i], &tos[i], 0);
|
||||
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
|
||||
test_context[i].seed = i;
|
||||
test_context[i].buffer_size = test_buffer_len[i];
|
||||
async_memcpy_setup_testbench(&test_context[i]);
|
||||
}
|
||||
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
|
||||
TEST_ESP_OK(esp_async_memcpy(driver, tos[i], froms[i], test_buffer_len[i], NULL, NULL));
|
||||
TEST_ESP_OK(esp_async_memcpy(driver, test_context[i].to_addr, test_context[i].from_addr, test_context[i].buffer_size, NULL, NULL));
|
||||
}
|
||||
for (int i = 0; i < sizeof(sbufs) / sizeof(sbufs[0]); i++) {
|
||||
async_memcpy_verify_and_clear_testbench(i, test_buffer_len[i], sbufs[i], dbufs[i], froms[i], tos[i]);
|
||||
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
|
||||
async_memcpy_verify_and_clear_testbench(i, test_context[i].buffer_size, test_context[i].src_buf, test_context[i].dst_buf, test_context[i].from_addr, test_context[i].to_addr);
|
||||
}
|
||||
|
||||
// Non-aligned case
|
||||
for (int i = 0; i < sizeof(sbufs) / sizeof(sbufs[0]); i++) {
|
||||
async_memcpy_setup_testbench(i, &test_buffer_len[i], &sbufs[i], &dbufs[i], &froms[i], &tos[i], 3);
|
||||
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
|
||||
test_context[i].seed = i;
|
||||
test_context[i].buffer_size = test_buffer_len[i];
|
||||
test_context[i].offset = 3;
|
||||
async_memcpy_setup_testbench(&test_context[i]);
|
||||
}
|
||||
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
|
||||
TEST_ESP_OK(esp_async_memcpy(driver, tos[i], froms[i], test_buffer_len[i], NULL, NULL));
|
||||
TEST_ESP_OK(esp_async_memcpy(driver, test_context[i].to_addr, test_context[i].from_addr, test_context[i].buffer_size, NULL, NULL));
|
||||
}
|
||||
for (int i = 0; i < sizeof(sbufs) / sizeof(sbufs[0]); i++) {
|
||||
async_memcpy_verify_and_clear_testbench(i, test_buffer_len[i], sbufs[i], dbufs[i], froms[i], tos[i]);
|
||||
for (int i = 0; i < sizeof(test_buffer_len) / sizeof(test_buffer_len[0]); i++) {
|
||||
async_memcpy_verify_and_clear_testbench(i, test_context[i].buffer_size, test_context[i].src_buf, test_context[i].dst_buf, test_context[i].from_addr, test_context[i].to_addr);
|
||||
}
|
||||
|
||||
TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
|
||||
}
|
||||
|
||||
#define TEST_ASYNC_MEMCPY_BENCH_COUNTS (16)
|
||||
static uint32_t test_async_memcpy_bench_len = 4095;
|
||||
static int count = 0;
|
||||
#define TEST_ASYNC_MEMCPY_BENCH_COUNTS (16)
|
||||
static int s_count = 0;
|
||||
|
||||
static IRAM_ATTR bool test_async_memcpy_isr_cb(async_memcpy_t mcp_hdl, async_memcpy_event_t *event, void *cb_args)
|
||||
{
|
||||
SemaphoreHandle_t sem = (SemaphoreHandle_t)cb_args;
|
||||
BaseType_t high_task_wakeup = pdFALSE;
|
||||
count++;
|
||||
if (count == TEST_ASYNC_MEMCPY_BENCH_COUNTS) {
|
||||
s_count++;
|
||||
if (s_count == TEST_ASYNC_MEMCPY_BENCH_COUNTS) {
|
||||
xSemaphoreGiveFromISR(sem, &high_task_wakeup);
|
||||
}
|
||||
return high_task_wakeup == pdTRUE;
|
||||
}
|
||||
|
||||
TEST_CASE("memory copy by DMA with callback", "[async mcp]")
|
||||
static void memcpy_performance_test(uint32_t buffer_size)
|
||||
{
|
||||
SemaphoreHandle_t sem = xSemaphoreCreateBinary();
|
||||
|
||||
async_memcpy_config_t config = ASYNC_MEMCPY_DEFAULT_CONFIG();
|
||||
config.backlog = TEST_ASYNC_MEMCPY_BENCH_COUNTS;
|
||||
config.backlog = (buffer_size / DMA_DESCRIPTOR_BUFFER_MAX_SIZE + 1) * TEST_ASYNC_MEMCPY_BENCH_COUNTS;
|
||||
config.sram_trans_align = 4; // at least 4 bytes aligned for SRAM transfer
|
||||
config.psram_trans_align = 64; // at least 64 bytes aligned for PSRAM transfer
|
||||
async_memcpy_t driver = NULL;
|
||||
int64_t elapse_us = 0;
|
||||
float throughput = 0.0;
|
||||
TEST_ESP_OK(esp_async_memcpy_install(&config, &driver));
|
||||
|
||||
uint8_t *sbuf = NULL;
|
||||
uint8_t *dbuf = NULL;
|
||||
uint8_t *from = NULL;
|
||||
uint8_t *to = NULL;
|
||||
|
||||
async_memcpy_setup_testbench(0, &test_async_memcpy_bench_len, &sbuf, &dbuf, &from, &to, 0);
|
||||
count = 0;
|
||||
// 1. SRAM->SRAM
|
||||
memcpy_testbench_context_t test_context = {
|
||||
.align = config.psram_trans_align,
|
||||
.buffer_size = buffer_size,
|
||||
.src_in_psram = false,
|
||||
.dst_in_psram = false,
|
||||
};
|
||||
async_memcpy_setup_testbench(&test_context);
|
||||
s_count = 0;
|
||||
ccomp_timer_start();
|
||||
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
|
||||
TEST_ESP_OK(esp_async_memcpy(driver, to, from, test_async_memcpy_bench_len, test_async_memcpy_isr_cb, sem));
|
||||
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
|
||||
}
|
||||
|
||||
// wait for done semaphore
|
||||
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
|
||||
esp_rom_printf("memcpy %d Bytes data by HW costs %lldus\r\n", test_async_memcpy_bench_len, ccomp_timer_stop() / TEST_ASYNC_MEMCPY_BENCH_COUNTS);
|
||||
|
||||
elapse_us = ccomp_timer_stop();
|
||||
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
|
||||
IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: SRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
|
||||
ccomp_timer_start();
|
||||
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
|
||||
memcpy(to, from, test_async_memcpy_bench_len);
|
||||
memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size);
|
||||
}
|
||||
esp_rom_printf("memcpy %d Bytes data by SW costs %lldus\r\n", test_async_memcpy_bench_len, ccomp_timer_stop() / TEST_ASYNC_MEMCPY_BENCH_COUNTS);
|
||||
elapse_us = ccomp_timer_stop();
|
||||
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
|
||||
IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: SRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
|
||||
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
|
||||
|
||||
async_memcpy_verify_and_clear_testbench(0, test_async_memcpy_bench_len, sbuf, dbuf, from, to);
|
||||
#if CONFIG_SPIRAM && SOC_GDMA_SUPPORT_PSRAM
|
||||
// 2. PSRAM->PSRAM
|
||||
test_context.src_in_psram = true;
|
||||
test_context.dst_in_psram = true;
|
||||
async_memcpy_setup_testbench(&test_context);
|
||||
s_count = 0;
|
||||
ccomp_timer_start();
|
||||
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
|
||||
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
|
||||
}
|
||||
// wait for done semaphore
|
||||
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
|
||||
elapse_us = ccomp_timer_stop();
|
||||
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
|
||||
IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: PSRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
|
||||
ccomp_timer_start();
|
||||
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
|
||||
memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size);
|
||||
}
|
||||
elapse_us = ccomp_timer_stop();
|
||||
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
|
||||
IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: PSRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
|
||||
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
|
||||
|
||||
// 3. PSRAM->SRAM
|
||||
test_context.src_in_psram = true;
|
||||
test_context.dst_in_psram = false;
|
||||
async_memcpy_setup_testbench(&test_context);
|
||||
s_count = 0;
|
||||
ccomp_timer_start();
|
||||
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
|
||||
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
|
||||
}
|
||||
// wait for done semaphore
|
||||
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
|
||||
elapse_us = ccomp_timer_stop();
|
||||
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
|
||||
IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: PSRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
|
||||
ccomp_timer_start();
|
||||
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
|
||||
memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size);
|
||||
}
|
||||
elapse_us = ccomp_timer_stop();
|
||||
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
|
||||
IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: PSRAM->SRAM, size: %zu Bytes", throughput, test_context.buffer_size);
|
||||
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
|
||||
|
||||
// 4. SRAM->PSRAM
|
||||
test_context.src_in_psram = false;
|
||||
test_context.dst_in_psram = true;
|
||||
async_memcpy_setup_testbench(&test_context);
|
||||
s_count = 0;
|
||||
ccomp_timer_start();
|
||||
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
|
||||
TEST_ESP_OK(esp_async_memcpy(driver, test_context.to_addr, test_context.from_addr, test_context.buffer_size, test_async_memcpy_isr_cb, sem));
|
||||
}
|
||||
// wait for done semaphore
|
||||
TEST_ASSERT_EQUAL(pdTRUE, xSemaphoreTake(sem, pdMS_TO_TICKS(1000)));
|
||||
elapse_us = ccomp_timer_stop();
|
||||
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
|
||||
IDF_LOG_PERFORMANCE("DMA_COPY", "%.2f MB/s, dir: SRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
|
||||
ccomp_timer_start();
|
||||
for (int i = 0; i < TEST_ASYNC_MEMCPY_BENCH_COUNTS; i++) {
|
||||
memcpy(test_context.to_addr, test_context.from_addr, test_context.buffer_size);
|
||||
}
|
||||
elapse_us = ccomp_timer_stop();
|
||||
throughput = (float)test_context.buffer_size * 1e6 * TEST_ASYNC_MEMCPY_BENCH_COUNTS / 1024 / 1024 / elapse_us;
|
||||
IDF_LOG_PERFORMANCE("CPU_COPY", "%.2f MB/s, dir: SRAM->PSRAM, size: %zu Bytes", throughput, test_context.buffer_size);
|
||||
async_memcpy_verify_and_clear_testbench(test_context.seed, test_context.buffer_size, test_context.src_buf, test_context.dst_buf, test_context.from_addr, test_context.to_addr);
|
||||
#endif
|
||||
|
||||
TEST_ESP_OK(esp_async_memcpy_uninstall(driver));
|
||||
vSemaphoreDelete(sem);
|
||||
}
|
||||
|
||||
TEST_CASE("memory copy performance test 40KB", "[async mcp]")
|
||||
{
|
||||
memcpy_performance_test(40 * 1024);
|
||||
}
|
||||
|
||||
TEST_CASE("memory copy performance test 4KB", "[async mcp]")
|
||||
{
|
||||
memcpy_performance_test(4 * 1024);
|
||||
}
|
||||
|
||||
#endif //SOC_CP_DMA_SUPPORTED || SOC_GDMA_SUPPORTED
|
||||
|
||||
@@ -37,7 +37,7 @@ static inline esp_err_t crypto_shared_gdma_new_channel(gdma_channel_alloc_config
|
||||
esp_err_t ret;
|
||||
int time_waited_ms = 0;
|
||||
|
||||
while(1) {
|
||||
while (1) {
|
||||
ret = gdma_new_channel(channel_config, channel);
|
||||
|
||||
if (ret == ESP_OK) {
|
||||
@@ -58,14 +58,12 @@ static inline esp_err_t crypto_shared_gdma_new_channel(gdma_channel_alloc_config
|
||||
/* Initialize external memory specific DMA configs */
|
||||
static void esp_crypto_shared_dma_init_extmem(void)
|
||||
{
|
||||
int tx_ch_id = 0;
|
||||
int rx_ch_id = 0;
|
||||
|
||||
gdma_get_channel_id(tx_channel, &tx_ch_id);
|
||||
gdma_get_channel_id(rx_channel, &rx_ch_id);
|
||||
|
||||
gdma_ll_tx_set_block_size_psram(&GDMA, tx_ch_id, GDMA_LL_EXT_MEM_BK_SIZE_16B);
|
||||
gdma_ll_rx_set_block_size_psram(&GDMA, rx_ch_id, GDMA_LL_EXT_MEM_BK_SIZE_16B);
|
||||
gdma_transfer_ability_t transfer_ability = {
|
||||
.sram_trans_align = 4,
|
||||
.psram_trans_align = 16,
|
||||
};
|
||||
gdma_set_transfer_ability(tx_channel, &transfer_ability);
|
||||
gdma_set_transfer_ability(rx_channel, &transfer_ability);
|
||||
}
|
||||
#endif //SOC_GDMA_SUPPORT_PSRAM
|
||||
|
||||
@@ -137,7 +135,7 @@ esp_err_t esp_crypto_shared_gdma_start(const lldesc_t *input, const lldesc_t *ou
|
||||
return ESP_ERR_INVALID_ARG;
|
||||
}
|
||||
|
||||
/* tx channel is reset by gdma_connect(), also reset rx to ensure a known state */
|
||||
/* tx channel is reset by gdma_connect(), also reset rx to ensure a known state */
|
||||
gdma_get_channel_id(tx_channel, &rx_ch_id);
|
||||
gdma_ll_rx_reset_channel(&GDMA, rx_ch_id);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user