TIwilliam hill官网
直播中

王一凡

7年用户 242经验值
私信 关注
[问答]

DSP C6678 EDMA 多核并行处理耗时问题

各位专家,大家好!
关于6678中EDMA模块8核并行传输。我采用了如下配置:核0~1使用EDMA CC0,核2~5使用EDMA CC1,核6~7使用EDMA CC2。核0使用Que0;核1使用Que1;核2使用Que0;核3使用Que1;核4使用Que2;核5使用Que3;核6使用Que0;核7使用Que1。
每个核使用与核号相同的通道号,完成标志TCC,PaRAM。每个通道采用shadow_region方式,以核号的形式进行区域划分。
现在的情况是8个核并行传输,都可以完成EDMA传输。但是达不到并行模式,耗时没有得到优化。根据6678 datasheet。EDMA模块共有10个TC模块,所有理论上可以达到8个核并行传输8个通道的数据。但是单核传输大块数据,与8核均分分块传输数据,耗时相差不大(传输64M的数据时,单核耗时为200ms;8核耗时为169ms)。没有达到并行传输的目的。虽然数据传输完成并且没有错误。
请问要达到8核完全并行运行EDMA,使8核耗时达到单核的1/8时,需要更改设置哪些参数?非常感谢!请各位专家指点。希望能够得到各位专家的宝贵建议。
附上主要函数:
#include <ti/csl/csl_chip.h>
#include
#include
#include
CSL_Edma3ChannelObj chObj;
CSL_Edma3Obj edmaObj;
CSL_Status status;
CSL_Edma3Context context;
extern unsigned int coreNum;
/***************************************************************
* void DMA_Init_region(int ChannelNum)
* use region shadow
**************************************************************/
void DMA_Init_region()
[
int instNum;
if(coreNum < 2)
[
instNum = 0;
]
else if(coreNum < 6)
[
instNum = 1;
]
else
[
instNum = 2;
]
CSL_Edma3ChannelAttr chAttr;
CSL_Edma3Handle hModule;
CSL_Edma3ChannelHandle hChannel;
CSL_Edma3CmdDrae regionAccess;
CSL_Edma3CmdIntr regionIntr;
/* Module initialization */
CSL_edma3Init(&context);
/* Module level open */
hModule = CSL_edma3Open(&edmaObj,instNum, NULL, &status);
regionAccess.region = coreNum;
regionAccess.drae = 0x1< regionAccess.draeh = 0x0;
CSL_edma3HwControl(hModule,CSL_EDMA3_CMD_DMAREGION_ENABLE, ®ionAccess);
regionIntr.region = coreNum;
regionIntr.intr = 0x1< regionIntr.intrh = 0x0;
CSL_edma3HwControl(hModule,CSL_EDMA3_CMD_INTR_ENABLE, ®ionIntr);
CSL_edma3MapEventQueueToTC(hModule, 0, 0);
if(coreNum < 2)
[
CSL_edma3MapEventQueueToTC(hModule, coreNum, coreNum);
]
else if(coreNum < 6)
[
CSL_edma3MapEventQueueToTC(hModule, coreNum-2, coreNum-2);
]
else
[
CSL_edma3MapEventQueueToTC(hModule, coreNum-6, coreNum-6);
]
chAttr.regionNum = coreNum; //coreNum; //CSL_EDMA3_REGION_GLOBAL CSL_EDMA3_REGION_0
chAttr.chaNum = coreNum;
hChannel = CSL_edma3ChannelOpen(&chObj, instNum, &chAttr, &status);
/* Map the DMA Channel to the appropriate PARAM Block. We start with PING
* which is located at PARAM Block cNum. */
CSL_edma3HwChannelSetupParam(hChannel, coreNum); // ParamNum
/* Enable channel */
CSL_edma3HwChannelControl(hChannel,CSL_EDMA3_CMD_CHANNEL_ENABLE, NULL);
]

/***************************************************************
* void DMA_transport_ab_region(int ChannelNum,Uint32 srcBuff , Uint32 dstBuff, int acnt, int bcnt, int srcBidx, int dstBidx)
* use region shadow
**************************************************************/
void DMA_transport_ab_region(Uint32 srcBuff , Uint32 dstBuff, int acnt, int bcnt, int srcBidx, int dstBidx)
[
int instNum;
if(coreNum < 2)
[
instNum = 0;
]
else if(coreNum < 6)
[
instNum = 1;
]
else
[
instNum = 2;
]
CSL_Edma3ChannelHandle hChannel;
CSL_Edma3ParamSetup myParamSetup;
CSL_Edma3ParamHandle hParamPing;
CSL_Edma3ChannelAttr chAttr;
chAttr.regionNum = coreNum; //coreNum; //CSL_EDMA3_REGION_GLOBAL CSL_EDMA3_REGION_0
chAttr.chaNum = coreNum;
hChannel = CSL_edma3ChannelOpen(&chObj, instNum, &chAttr, &status);
hParamPing = CSL_edma3GetParamHandle(hChannel, coreNum, &status); // panum
myParamSetup.option = CSL_EDMA3_OPT_MAKE(CSL_EDMA3_ITCCH_DIS,
CSL_EDMA3_TCCH_DIS,
CSL_EDMA3_ITCINT_DIS,
CSL_EDMA3_TCINT_EN,
coreNum, CSL_EDMA3_TCC_NORMAL,
CSL_EDMA3_FIFOWIDTH_NONE,
CSL_EDMA3_STATIC_DIS,
CSL_EDMA3_SYNC_AB,
CSL_EDMA3_ADDRMODE_INCR,
CSL_EDMA3_ADDRMODE_INCR );

myParamSetup.srcAddr = srcBuff;
myParamSetup.aCntbCnt = CSL_EDMA3_CNT_MAKE(acnt,bcnt);
myParamSetup.dstAddr = dstBuff;
myParamSetup.srcDstBidx = CSL_EDMA3_BIDX_MAKE(srcBidx, dstBidx);
myParamSetup.srcDstCidx = CSL_EDMA3_CIDX_MAKE(0,0);
myParamSetup.cCnt = 1;
myParamSetup.linkBcntrld= CSL_EDMA3_LINKBCNTRLD_MAKE(CSL_EDMA3_LINK_NULL,0);
CSL_edma3ParamSetup(hParamPing,&myParamSetup);
switch(coreNum)
[
case 0:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_0);
break;
case 1:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_1);
break;
case 2:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_0);
break;
case 3:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_1);
break;
case 4:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_2);
break;
case 5:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_3);
break;
case 6:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_0);
break;
case 7:
CSL_edma3HwChannelSetupQue(hChannel,CSL_EDMA3_QUE_1);
break;
]
/* Manually trigger the channel 设置同步事件*/
CSL_edma3HwChannelControl(hChannel,CSL_EDMA3_CMD_CHANNEL_SET,NULL);
]

/***************************************************************
* void waitDMAover_region(int ChannelNum)
* use region shadow
**************************************************************/
void waitDMAover_region()
[
CSL_Edma3Handle hModule;
CSL_Edma3CmdIntr regionIntr;
int instNum;
if(coreNum < 2)
[
instNum = 0;
]
else if(coreNum < 6)
[
instNum = 1;
]
else
[
instNum = 2;
]
int query=0x1< hModule = CSL_edma3Open(&edmaObj,instNum,NULL,&status);
regionIntr.region = coreNum;//coreNum
regionIntr.intr = 0;
regionIntr.intrh = 0;
/* Poll on IPR bit 0 */
do [
CSL_edma3GetHwStatus(hModule,CSL_EDMA3_QUERY_INTRPEND,®ionIntr);
] while (!(regionIntr.intr & query));
/* Clear the pending bit */
CSL_edma3HwControl(hModule,CSL_EDMA3_CMD_INTRPEND_CLEAR, ®ionIntr);
]

/***************************************************************
* void DMA_Close_region(int ChannelNum)
* use region shadow
**************************************************************/
void DMA_Close_region()
[
int instNum;
if(coreNum < 2)
[
instNum = 0;
]
else if(coreNum < 6)
[
instNum = 1;
]
else
[
instNum = 2;
]
CSL_Edma3Handle hModule;
CSL_Edma3ChannelHandle hChannel;
CSL_Edma3ChannelAttr chAttr;
chAttr.regionNum = coreNum;
chAttr.chaNum = coreNum;
hChannel = CSL_edma3ChannelOpen(&chObj, instNum, &chAttr, &status);
hModule = CSL_edma3Open(&edmaObj,instNum,NULL,&status);
/* Close channel */
CSL_edma3ChannelClose(hChannel);
/* Close EDMA module */
CSL_edma3Close(hModule);
]
unsigned int convert_coreLocalToGlobalAddr( unsigned int addr)
[
unsigned int coreNum0;
coreNum0 = CSL_chipReadReg(CSL_CHIP_DNUM);
return ((1 << 28) | (coreNum0 << 24) | (addr & 0x00ffffff));
]

/*
 * main.c
 */
#include
#include
#include
#include
#include
 
#include
#include
 
#include "RSP_EDMA.h"
 
#pragma DATA_ALIGN(x_sp, 8);
float   x_sp[2*NRN];
 
unsigned int coreNum;                      //当前程序运行的核号
 
void main ()
[
 
       coreNum=CSL_chipReadReg(CSL_CHIP_DNUM);//获得各核编号
       coreNum = coreNum % 8;
 
       Uint32 srcAddr = 0x84000000 + coreNum*NAN*2*size_float/8;
 
       DMA_Init_region();
       int i = 0;
       int loopNum = NAN/8;
       for(i=0; i        [
              //传入
              DMA_transport_ab_region(srcAddr + 2*size_float*i, convert_coreLocalToGlobalAddr((Uint32)&x_sp),
                            size_float*2, NRN, 2*size_float*NAN, 2*size_float);
              waitDMAover_region();
       ]
       CSL_semAcquireDirect(coreNum+1);  //每个核获得对应核号加1编号的信号量
while((CSL_semIsFree(1)|CSL_semIsFree(2)|CSL_semIsFree(3)|CSL_semIsFree(4)|CSL_semIsFree(5)|CSL_semIsFree(6)|CSL_semIsFree(7)|CSL_semIsFree(8)));
       DMA_Close_region();
]

回帖(4)

张强

2018-6-21 08:54:31
流量优化是个细致核麻烦以及有技术含量的活儿;没仔细看你的代码;
建议你搜搜TI专家们发布过的memory performance文档,上面有EDMA使用的技巧,以进行针对性的优化;
举报

张强

2018-6-21 09:02:49
e2echina.ti.com/.../download
帮你找到了,***
举报

王一凡

2018-6-21 09:17:34
引用: uuwyfsdfsf 发表于 2018-6-21 09:02
e2echina.ti.com/.../download
帮你找到了,或许能解决你的问题.

非常感谢。
举报

张强

2018-6-21 09:35:44

不客气.
举报

更多回帖

发帖
×
20
完善资料,
赚取积分