本文主要是關(guān)于DSP320C6000的相關(guān)介紹,并著重對DSP320C6000的指令表進(jìn)行了詳盡的闡述。
DSP320C6000
TMS320C6000產(chǎn)品是美國TI公司于1997年推出的dsp芯片,該DSP芯片定點、浮點兼容,其中,定點系列是TMS320C62xx系列,浮點系列是TMS320C67xx系列,2000年3月,TI發(fā)布新的C64xx內(nèi)核,主頻為1.1GHz,處理速度9000MIPS,在圖像處理和流媒體領(lǐng)域得到了廣泛的應(yīng)用。
C6000片內(nèi)有8個并行的處理單元,分為相同的兩組。DSP的體系結(jié)構(gòu)采用超長指令字(vliw)結(jié)構(gòu),單指令字長為32位,指令包里有8條指令,總字長達(dá)到256位。執(zhí)行指令的功能單元已經(jīng)在編譯時分配好,程序運行時通過專門的指令分配模塊,可以將每個256為的指令包同時分配到8個處理單元,并有8個單元同時運行。芯片最高時鐘頻率為300MHz(67xx系列),且內(nèi)部8個處理單元并行運行時,其最大處理能力可達(dá)到1600MIPS。
DSP320C6000系列
DSP320C6000的指令列表匯集
內(nèi)聯(lián)指令 匯編指令 簡要描述
int _abs (int src);
int _labs (__int40_t src)ABS返回src的絕對值
int _add2 (int src1, int src2)ADD2把src1的高、低16位和src2的高、低16位分別相加,放入結(jié)果的高、低16位
ushort & _amem2 (void *ptr);LDHU
STHU從內(nèi)存中加載一個halfword到dst里,必須2byte對齊(讀或存)
const ushort & _amem2_const (const void *ptr);LDHU必須2byte對齊(讀)
unsigned & _amem4 (void *ptr);LDW
STW必須4byte對齊(讀或存)
const unsigned & _amem4_const (const void *ptr);LDW必須4byte對齊(讀)
double & _amemd8 (void *ptr);LDW/LDW
STW/STW必須8byte對齊(讀或存)
const double & _amemd8_const (const void *ptr);LDDW必須8byte對齊(讀)
unsigned _clr (unsigned src2, unsigned csta,unsigned cstb);CLR指定了從需要清0的首位和末位
unsigned _clrr (unsigned src2, int src1);CLR將src2中指定位清0,清0的首位和末位由src1的低10位指定
__int40_t _dtol (double src); 將一個double寄存器重新解釋成一個__int40_t
long long _dtoll (double src); 將一個double寄存器重新解釋成一個long long
int _ext (int src2, unsigned csta, unsigned cstb);EXT從src2里提取csta和cstb指定的區(qū)域且符號擴展到32位。提取出的區(qū)域先符號左移再右移。
int _extr (int src2, int src1);EXT同上,區(qū)別:左右移的位數(shù)由src1的低10位指定
unsigned _extu (unsigned src2, unsigned csta , unsigned cstb);EXTU同上上,區(qū)別最后是0擴展到32位。
unsigned _extur (unsigned src2, intsrc1);EXTU同上,區(qū)別:左右移的位數(shù)由src1的低10位指定例:
_ftoi (1.0) == 1065353216U
unsigned _ftoi (float src); 將float的比特位解釋成unsigned
unsigned _hi (double src); 返回double寄存器的高位(奇數(shù)位)
unsigned _hill (long long src); 返回longlong寄存器的高位(奇數(shù)位)
double _itod (unsigned src2, unsignedsrc1); 創(chuàng)建一個新的double寄存器為了解釋2個unsigned的值,其中src2是高(奇數(shù))寄存器,src1是低(偶數(shù))寄存器
float _itof (unsigned src); 將unsigned中的比特位解釋成float例:
_itof (0x3f800000) = 1.0
long long _itoll (unsigned src2, unsignedsrc1); 創(chuàng)建一個新的longlong寄存器為了解釋2個unsigned的值,其中src2是高(奇數(shù))寄存器,src1是低(偶數(shù))
unsigned _lmbd (unsigned src1, unsignedsrc2);LMBD搜索src2里面的1或0,1或0是由src1的LSB決定的,返回比特位變化的位數(shù)
unsigned _lo (double src); 返回double寄存器的低(奇數(shù))寄存器
unsigned _loll (long long src); 返回longlong寄存器的低(奇數(shù))寄存器
double _ltod (__int40_t src); 把一個__int40_t寄存器解釋成一個double寄存器
double _lltod (long long src); 把一個longlong寄存器解釋成一個double寄存器
int _mpy (int src1, int src2);MPYSrc1和src2相乘,操作數(shù)默認(rèn)為有符號的
int _mpyus (unsigned src1, int src2);MPYUS無符號src1和有符號src2相乘,S是用來那個是有符號的操作數(shù),當(dāng)兩個操作數(shù)都是有符號的或者無符號的
int _mpysu (int src1, unsigned src2);MPYSU同上
unsigned _mpyu (unsigned src1,unsigned src2);MPYU同上上上,默認(rèn)為無符號
int _mpyh (int src1, int src2);MPYH同上,區(qū)別見圖示
int _mpyhus (unsigned src1, int src2);MPYHUS
int _mpyhsu (int src1, unsigned src2);MPYHSU
unsigned _mpyhu (unsigned src1,unsigned src2);MPYHU
int _mpyhl (int src1, int src2);MPYHL同上,區(qū)別見圖示
int _mpyhuls (unsigned src1, int src2);MPYHULS
int _mpyhslu (int src1, unsigned src2);MPYHSLU
unsigned _mpyhlu (unsigned src1,unsigned src2);MPYHLU
int _mpylh (int src1, int src2);MPYLH
int _mpyluhs (unsigned src1, int src2);MPYLUHS
int _mpylshu (int src1, unsigned src2);MPYLSHU
unsigned _mpylhu (unsigned src1,unsigned src2);MPYLHU
void _nassert (int src); 不生成代碼,告訴優(yōu)化器一些事情
unsigned _norm (int src);
unsigned _lnorm (__int40_t src);NORM
返回src2的冗余的符號比特位的個數(shù),具體見圖示
int _sadd (int src1, int src2);
long _lsadd (int src1, __int40_t src2);SADD將src1和src2相加,且飽和其結(jié)果
int _sat (__int40_t src2);SAT將一個40比特的long轉(zhuǎn)換為一個32比特的有符號int,如有需要,對結(jié)果進(jìn)行飽和
unsigned _set (unsigned src2, unsignedcsta , unsigned cstb);SET將src2中指定的區(qū)域置位1,指定的區(qū)域由csta和cstb指定
unsigned _setr (unit src2, int src1);SET
int _smpy (int src1, int src2);SMPY把src1的低16位和src2的低16位相乘
int _smpyh (int src1, int src2);SMPYH高16位
int _smpyhl (int src1, int src2);SMPYHL
int _smpylh (int src1, int src2);SMPYLH
int _sshl (int src2, unsigned src1);SSHL以src1操作數(shù)將src2左移,并且將結(jié)果飽和在32位
int _ssub (int src1, int src2);
__int40_t _lssub (int src1, __int40_tsrc2);SSUB從src1中減去src2,并飽和結(jié)果(src1-src2)
unsigned _subc (unsigned src1, unsignedsrc2);SUBC有條件的減和左移(常用于除法)
int _sub2 (int src1, int src2);SUB2把src1的高低16位分別減去src2的高低16位。任何低16位的借位不會影響高16位。
int _abs2 (int src);ABS2計算16位的絕對值
int _add4 (int src1, int src2);ADD4把src1和src2的4對8位數(shù)相加。不會進(jìn)行飽和,進(jìn)位不會影響其他的8位數(shù)
long long & _amem8 (void *ptr);LDDW
STDW加載和存儲8bytes,指針必須8byte對齊
const long long & _amem8_const (const void *ptr);LDDW加載8bytes,指針必須8byte對齊
__float2_t & _amem8_f2(void * ptr);LDDW
STDW加載和存儲8bytes,指針必須8byte對齊,必須包含c6x.h
const __float2_t & _amem8_f2_const(void * ptr);LDDW加載8bytes,指針必須8byte對齊,必須包含c6x.h
double & _amemd8 (void *ptr);LDDW
STDW
const double & _amemd8_const (const void *ptr);LDDW
int _avg2 (int src1, int src2);AVG2計算每對有符號16位置的平均值
unsigned _avgu4 (unsigned, unsigned);AVGU4計算每對有符號8位數(shù)的平均值
unsigned _bitc4 (unsigned src);BITC4統(tǒng)計每個8位的比特位是1的個數(shù),寫入結(jié)果對應(yīng)位置
unsigned _bitr (unsigned src);BITR翻轉(zhuǎn)比特位的順序
int _cmpeq2 (int src1, int src2);CMPEQ2比較每16位的值是否相等,結(jié)果放入dst的最低2位
int _cmpeq4 (int src1, int src2);CMPEQ4比較每8位的值是否相等,結(jié)果放入dst的最低4位,相等置1,否則為0
int _cmpgt2 (int src1, int src2);CMPGT2每16位有符號比較,src1》src2,置為1;否則置為0。結(jié)果放入dst的最低2位
unsigned _cmpgtu4 (unsigned src1,unsigned src2);CMPGTU4每8位無符號比較,src1》src2,置為1;否則置為0。結(jié)果放入dst的最低4位
unsigned _deal (unsigned src );DEAL將src中的比特位的奇數(shù)位和偶數(shù)位抽出來進(jìn)行重組,偶數(shù)位放在低的16位,奇數(shù)位放在高的16位
int _dotp2 (int src1, int src2);
__int40_t _ldotp2 (int src1, int src2);DOTP2
DOTP2將src1中的和src2中的16位有符號對進(jìn)行點積,結(jié)果被寫成有符號32位int或者符號擴展為64位
int _dotpn2 (int src1, int src2);DOTPN2將src1和src2中的16位有符號數(shù)進(jìn)行點積相減
int _dotpnrsu2 (int src1, unsigned src2);DOTPNRSU2Src1和src2的高16位的點積減去低16位的點積。Src1中的數(shù)被當(dāng)做有符號,src2中的數(shù)被當(dāng)做無符號,再加上2^15,結(jié)果再符號右移16位
int _dotprsu2 (int src1, unsigned src2);DOTPRSU2Src1和src2的高16位的點積加上低16位的點積。Src1中的數(shù)被當(dāng)做有符號,src2中的數(shù)被當(dāng)做無符號,再加上2^15,結(jié)果再符號右移16位
int _dotpsu4 (int src1, unsigned src2);DOTPSU4將src1和src2的每8位進(jìn)行相乘再求和,src1的每8位數(shù)被當(dāng)做有符號,src2的每8位數(shù)被當(dāng)做無符號
unsigned _dotpu4 (unsigned src1,unsigned src2);DOTPU4都被當(dāng)做無符號的
int _gmpy4 (int src1, int src2);GMPY4將src1和src2的4個無符號進(jìn)行伽羅瓦域的乘法
int _max2 (int src1, int src2);MAX2將src1和src2的2個有符號16位整數(shù)比較,取較大值
int _min2 (int src1, int src2);MIN2將src1和src2的2個有符號16位整數(shù)比較,取較小值
unsigned _maxu4 (unsigned src1,unsigned src2);MAXU4將src1和src2的4個無符號8位整數(shù)比較,取較大值
unsigned _minu4 (unsigned src1,unsigned src2);MINU4將src1和src2的4個無符號8位整數(shù)比較,取較小值
ushort & _mem2 (void * ptr);LDB/LDB
STB/STB加載和存儲2byte,不需要對齊
const ushort & _mem2_const (const void * ptr);LDB/LDB加載2byte,不需要對齊
unsigned & _mem4 (void * ptr);LDNW
STNW加載和存儲4byte,不需要對齊
const unsigned & _mem4_const (const void * ptr);LDNW加載4byte,不需要對齊
long long & _mem8 (void * ptr);LDNDW
STNDW加載和存儲8byte,不需要對齊
const long long & _mem8_const (const void * ptr);LDNDW加載8byte,不需要對齊
double & _memd8 (void * ptr);LDNDW
STNDW加載和存儲8byte,不需要對齊
const double & _memd8_const (const void * ptr);LDNDW加載8byte,不需要對齊
long long _mpy2ll (int src1, int src2);MPY2將src1和src2中的2個有符號16位分別相乘,將2個32位的結(jié)果寫入longlong中
long long _mpyhill (int src1, int src2);MPYHI將src1中高16位作為1個有符號16位乘以src2的有符號32位,結(jié)果寫入longlong的低48位
long long _mpylill (int src1, int src2);MPYLI將src1中低16位作為1個有符號16位乘以src2的有符號32位,結(jié)果寫入longlong的低48位
int _mpyhir (int src1, int src2);MPYHIR將src1的高16位作為一個16位有符號乘以src2的有符號32位。乘積利用round模式通過加2^14轉(zhuǎn)成32位,最后再右移15位
int _mpylir (int src1, int src2);MPYLIR將src1的低16位作為一個16位有符號乘以src2的有符號32位。乘積利用round模式通過加2^14轉(zhuǎn)成32位,最后再右移15位
long long _mpysu4ll (int src1, unsignedsrc2);MPYSU4將src1的4個8位有符號乘src2的4個8位無符號,得到4個16位有符號,組成一個64位
long long _mpyu4ll (unsigned src1,unsigned src2);MPYU4將src1和src2的4個無符號8位相乘,得到4個無符號16位組成一個64位的數(shù)
int _mvd (int src2 );MVD將src2的數(shù)據(jù)移入返回值中,利用了乘法流水線(延遲)
unsigned _pack2 (unsigned src1,unsigned src2);PACK2
unsigned _packh2 (unsigned src1,unsigned src2);PACKH2
unsigned _packh4 (unsigned src1,unsigned src2);PACKH4
unsigned _packl4 (unsigned src1,unsigned src2);PACKL4
unsigned _packhl2 (unsigned src1,unsigned src2);PACKHL2
unsigned _packlh2 (unsigned src1,unsigned src2);PACKLH2
unsigned _rotl (unsigned src1, unsignedsrc2);ROTL按照src1的最低5位的數(shù)去左移src2的32位,src1中剩下的高的5-31位被忽略
int _sadd2 (int src1, int src2);SADD2將src1和src2中的2個16位有符號數(shù)相加,生成2個16有符號數(shù)并且是飽和過的。
int _saddus2 (unsigned src1, int src2);SADDUS2將src1中的2個無符號16位數(shù)和src中的2個16位有符號數(shù)相加,得到2個無符號16位數(shù)
unsigned _saddu4 (unsigned src1,unsigned src2);SADDU4將src1和src2中的4個無符號8位數(shù)相加
unsigned _shfl (unsigned src2);SHFL將src2的高16和低16位進(jìn)行交織
unsigned _shlmb (unsigned src1,unsigned src2);SHLMB將src2左移1byte,然后將src1的最高位充入src2左移后多出來的位置
unsigned _shrmb (unsigned src1,unsigned src2);SHRMB將src2右移1byte,然后將src1的最低位充入src2右移后多出來的位置
int _shr2 (int src1, unsigned src2);SHR2將src2的2個16位有符號數(shù)分別右移,右移的位數(shù)由src1的低5位決定,多出的位置由符號位擴展
unsigned shru2 (unsigned src1, unsignedsrc2);SHRU2將src2的2個16位無符號數(shù)分別右移,右移的位數(shù)由src1的低5位決定,多出的位置由0擴展
long long _smpy2ll (int src1, int src2);SMPY2將src1和src2中的2個有符號16位數(shù)相乘,然后左移1位,再進(jìn)行飽和。
int _spack2 (int src1, int src2);SPACK2將src1和src2中的1個有符號32位數(shù)進(jìn)行飽和到有符號16位,然后把src1的飽和結(jié)果放入dst的高16位,src2的飽和結(jié)果放入dst的低16位
unsigned _spacku4 (int src1 , int src2);SPACKU4將src1和src2中的4個有符號16位數(shù)飽和成無符號8位數(shù),
int _sshvl (int src2, int src1);SSHVL將src2中的有符號32位數(shù)左移或右移,移位的數(shù)量由src1指定的比特數(shù)確定。
src1在[-31,31]之間,如果src1為正,src2則左移;如果src1為負(fù),src2右移|src1|且符號位擴展
int _sshvr (int src2, int src1);SSHVR將src2中的有符號32位數(shù)左移或右移,移位的數(shù)量由src1指定的比特數(shù)確定。
src1在[-31,31]之間,如果src1為正,src2則右移且是符號擴展;如果src1為負(fù),src2左移|src1|
int _sub4 (int src1, int src2);SUB4將src1和src2中的4個8位數(shù)相減,不進(jìn)行飽和
int _subabs4 (int src1, int src2);SUBABS4將src1和src2中的4個無符號8位相減求絕對值
unsigned _swap4 (unsigned src);SWAP4將src的4個8位無符號數(shù)按圖示換位置
unsigned _unpkhu4 (unsigned src);UNPKHU4擴展0
unsigned _unpklu4 (unsigned src);UNPKLU4擴0
unsigned _xpnd2 (unsigned src);XPND2按src的最低2位進(jìn)行擴展,bit1擴展高16位,bit0擴展低16位
unsigned _xpnd4 (unsigned src);XPND4按src的最低4位進(jìn)行擴展
long long _addsub (int src1, int src2);ADDSUB平行做2步:
1、src2+src1-》dst_o
2、src1-src2-》dst_e
long long _addsub2 (int src1, int src2);ADDSUB216位有符號
ADD2:src2的高、低16位+src1的高、低16位-》dst_o
SUB2: src1的高、低16位-src2的高、低16位-》dst_e
long long _cmpy (unsigned src1,unsigned src2);CMPY有符號16位
Src1和src2的高16位的點積-src1和src2的低16位點積-》dst_o
飽和(src1和src2的高16位的點積+src1和src2的低16位點積)-》dst_e
unsigned _cmpyr (unsigned src1,unsigned src2);CMPYR
unsigned _cmpyr1 (unsigned src1,unsigned src2 );CMPYR1
long long _ddotp4 (unsigned src1,unsigned src2);DDOTP4沒有飽和
long long _ddotph2 (long long src1,unsigned src2);DDOTPH2
long long _ddotpl2 (long long src1,unsigned src2);DDOTPL2
unsigned _ddotph2r (long long src1,unsigned src2);DDOTPH2R
unsigned _ddotpl2r (long long src1,unsigned src2);DDOTPL2R
long long _dmv (int src1, int src2);DMV將兩個寄存器移入一個寄存器一次性的
long long _dpack2 (unsigned src1,unsigned src2);DPACK2
long long _dpackx2 (unsigned src1,unsigned src2);DPACKX2
__float2_t _fmdv_f2(float src1, floatsrc2)DMV
unsigned _gmpy (unsigned src1,unsigned src2);GMPY伽羅瓦域上的乘法
long long _mpy2ir (int src1, int src2);MPY2IR進(jìn)行16位乘32位。
將src1的高16位和低16位當(dāng)做有符號16位;將src2的值當(dāng)做有符號32位。
乘積通過加上2^14round到32位,然后結(jié)果右移15位。
2個結(jié)果的低32位寫入dst_o:dst_e
int _mpy32 (int src1, int src2);MPY32進(jìn)行32位乘32位。都是有符號的,64位結(jié)果中的低32位寫入dst
long long _mpy32ll (int src1, int src2);MPY3232位有符號數(shù)×32位有符號數(shù),有符號的64位結(jié)果被寫入dst
long long _mpy32su (int src1, int src2);MPY32SUsrc1有符號32位×src2無符號32位=dst有符號64位
long long _mpy32us (unsigned src1, intsrc2);MPY32USsrc1無符號32位×src2有符號32位=dst有符號64位
long long _mpy32u (unsigned src1,unsigned src2);MPY32Usrc1無符號32位×src2無符號32位=dst無符號64位
int _rpack2 (int src1, int src2);RPACK2
long long _saddsub (unsigned src1,unsigned src2);SADDSUB并行進(jìn)行:
1、飽和(src1+src2)-》dst_o
2、飽和(src1-src2)-》dst_e
long long _saddsub2 (unsigned src1,unsigned src2);SADDSUB2并行進(jìn)行SADD2和SSUB2指令
long long _shfl3 (unsigned src1, unsignedsrc2);SHFL3如圖,生成一個longlong
int _smpy32 (int src1, int src2);SMPY3232位有符號×32位有符號,64位的結(jié)果左移1位然后飽和,然后將之后的結(jié)果的高32位寫入dst
int _ssub2 (unsigned src1, unsignedsrc2);SSUB2Src1中的2個16位有符號-src2中的2個有符號16位,結(jié)果進(jìn)行飽和
unsigned _xormpy (unsigned src1,unsigned src2);XORMPY加瓦羅域乘法
int _dpint (double src);DPINT將double轉(zhuǎn)成int(round)
__int40_t _f2tol(__float2_t src); 將一個__float2_t解釋成一個__int40
__float2_t _f2toll(__float2_t src); 將一個__float2_t解釋成一個longlong
double _fabs (double src);ABSDP將src的絕對值放入dst。
float _fabsf (float src);ABSSP
__float2_t _lltof2(long long src); 將一個longlong解釋成一個__float2_t
__float2_t _ltof2(__int40_t src); 將一個__int40解釋成一個__float2_t
__float2_t & _mem8_f2(void * ptr);LDNDW
STNDW從內(nèi)存里加載一個64位值
const __float2_t & _mem8_f2_const(void * ptr);LDNDW
STNDW
long long _mpyidll (int src1, int src2);MPYIDSrc1×src2-》dst
double_mpysp2dp (float src1, float src2);MPYSP2DPSrc1×src2-》dst
double_mpyspdp (float src1, doublesrc2);MPYSPDPSrc1×src2-》dst
double _rcpdp (double src);RCPDP64位double倒數(shù)近似值放入dst
float _rcpsp (float src);RCPSP32位float的倒數(shù)近似值
double _rsqrdp (double src);RSQRDP64位double的平方根倒數(shù)近似值
float _rsqrsp (float src);RSQRSP32位float的平方根倒數(shù)近似值
int _spint (float);SPINTFloat轉(zhuǎn)為int
ADDDP2個double相加
ADDSP2個float相加
AND位與
ANDN與后取反
MPYSP2個float相乘
OR位或
SUBDP2個double相減
SUBSP2和float相減
XOR異或
__x128_t _ccmatmpy (long long src1,__x128_t src2);CMATMPY
long long _ccmatmpyr1 (long long src1,__x128_t src2);CCMATMPYR1
long long _ccmpy32r1 (long long src1,long long src2);CCMPY32R1
__x128_t _cmatmpy (long long src1,__x128_t src2);CMATMPY
long long _cmatmpyr1 (long long src1,__x128_t src2);CMATMPYR1
long long _cmpy32r1 (long long src1,long long src2);CMPY32R1
__x128_t _cmpysp (__float2_t src1,__float2_t src2);CMPYSP
double _complex_conjugate_mpysp (double src1, double src2);CMPYSP
DSUBSP
double _complex_mpysp (double src1,double src2);CMPYSP
DADDSP
int _crot90 (int src);CROT90復(fù)數(shù)的90度旋轉(zhuǎn)
int _crot270 (int src);CROT270復(fù)數(shù)的270度旋轉(zhuǎn)
long long _dadd (long long src1, long longsrc2);DADDSrc1的2個32位有符號數(shù)+src2的2個32位有符號數(shù)
long long _dadd2 (long long src1, long long src2);DADD24路有符號16位相加
__float2_t _daddsp (__float2_t src1,__float2_t src2);DADDSP
long long _dadd_c (scst5 immediate src1,long long src2);DADD2路float加法
long long _dapys2 (long long src1, long long src2);DAPYS2
long long _davg2 (long long src1, long long src2);DAVG2有符號16位
long long _davgnr2 (long long src1, long long src2);DAVGNR2有符號16位,無round模式
long long _davgnru4 (long long src1,long long src2);DAVGNRU4無符號8位,無round模式
long long _davgu4 (long long src1, long long src2);DAVGU4無符號8位
long long _dccmpyr1 (long long src1,long long src2);DCCMPYR1
unsigned _dcmpeq2 (long long src1, long long src2);DCMPEQ216位比較,相等返回1,不等返回0
unsigned _dcmpeq4 (long long src1, long long src2);DCMPEQ48位比較,相等返回1,不等返回0
unsigned _dcmpgt2 (long long src1, long long src2);DCMPGT216位比較,src1》src-》1,否則返回0
unsigned _dcmpgtu4 (long long src1,long long src2);DCMPGTU48位比較,src1》src-》1,否則返回0
__x128_t _dccmpy (long long src1, long long src2);DCCMPY
__x128_t _dcmpy (long long src1, long long src2);DCMPY
long long _dcmpyr1 (long long src1, long long src2);DCMPYR1
long long _dcrot90 (long long src);DCROT90
long long _dcrot270 (long long src);DCROT270
long long _ddotp4h (__x128_t src1,__x128_t src2 );DDOTP4H執(zhí)行2個dotp4h,都是有符號的
long long _ddotpsu4h (__x128_t src1,__x128_t src2 );DDOTPSU4H執(zhí)行2個dotpsu4h,一個有符號,一個無符號
__float2_t _dinthsp (int src);DINTHSPSrc中的16位有符號數(shù)轉(zhuǎn)成單精度浮點放入dst_e和dst_o中
__float2_t _dinthspu (unsigned src);DINTHSPUSrc中的16位無符號數(shù)轉(zhuǎn)成單精度浮點放入dst_e和dst_o中
__float2_t _dintsp(long long src);DINTSPSrc中的有符號32位轉(zhuǎn)成單精度浮點,放入dst_e和dst_o中
__float2_t _dintspu(long long src);DINTSPUSrc中的無符號32位轉(zhuǎn)成單精度浮點,放入dst_e和dst_o中
long long _dmax2 (long long src1, long long src2);DMAX2對src1和src2中的16位有符號數(shù)比大小,將大的放入dst中
long long _dmaxu4 (long long src1, long long src2);DMAXU4對src1和src2中的8位有符號數(shù)比大小,將大的放入dst中
long long _dmin2 (long long src1, long long src2);DMIN2對src1和src2中的16位有符號數(shù)比大小,將小的放入dst中
long long _dminu4 (long long src1, long long src2);DMINU4對src1和src2中的8位有符號數(shù)比大小,將小的放入dst中
__x128_t _dmpy2 (long long src1, long long src2);DMPY2將src1和src2中的16位有符號數(shù)相乘,得到32位有符號數(shù)放入128位寄存器中
__float2_t _dmpysp (__float2_t src1,__float2_t src2);DMPYSP
__x128_t _dmpysu4 (long long src1,long long src2);DMPYSU4將src1中的8位有符號數(shù)乘以src2中的無符號8位,等到有符號16位
__x128_t _dmpyu2 (long long src1, long long src2);DMPYU216位無符號數(shù)相乘,得到32位數(shù)放入128位寄存器中
__x128_t _dmpyu4 (long long src1, long long src2);DMPYU48位無符號數(shù)相乘,得到有符號16位結(jié)果
long long _dmvd (long long src1,unsigned src2 );DMVD將2個寄存器移入一個寄存器中。依次進(jìn)行2次移動,當(dāng)處理很多的double word時很有用。減輕寄存器壓力
int _dotp4h (long long src1, long longsrc2 );DOTP4H進(jìn)行兩個系列的16位值的點積
long long _dotp4hll (long long src1, long long src2 );DOTP4H返回值不同
int _dotpsu4h (long long src1, long longsrc2);DOTPSU4HSrc1中被當(dāng)做有符號16位,src2被當(dāng)做無符號16位,得到32位結(jié)果
long long _dotspu4hll (long long src1,long long src2);DOTPSU4HSrc1中被當(dāng)做有符號16位,src2被當(dāng)做無符號16位,得到64位結(jié)果
long long _dpackh2 (long long src1, long long src2);DPACKH2
long long _dpackh4 (long long src1, long long src2);DPACKH4并行執(zhí)行2個PACKH4
long long _dpacklh2 (long long src1, long long src2);DPACKLH2
long long _dpacklh4 (unsigned src1,unsigned src2);DPACKLH4并行執(zhí)行PACKH4和PACKL4
long long _dpackl2 (long long src1, long long src2);DPACKL2
long long _dpackl4 (long long src1, long long src2);DPACKL4并行執(zhí)行2個PACKL4
long long _dsadd (long long src1, long long src2);DSADD將src1中的2個有符號32位數(shù)加上src2中的2個有符號32位數(shù),結(jié)果進(jìn)行飽和
long long _dsadd2 (long long src1, long long src2);DSADD2結(jié)果飽和到[-2^15 2^15]
long long _dshl (long long src1, unsignedsrc2);DSHL將longlong中的2個32位左移,用0補位(有符號32位)
long long _dshl2 (long long src1,unsigned src2);DSHL2將longlong中的4個16位左移,用0補位(有符號16位)
long long _dshr (long long src1, unsignedsrc2);DSHR右移,符號位補位(有符號32位)
long long _dshr2 (long long src1,unsigned src2);DSHR2右移,符號位補位(有符號16位)
long long _dshru (long long src1,unsigned src2);DSHRU右移,0補位(無符號32位)
long long _dshru2 (long long src1,unsigned src2);DSHRU2右移,0補位(無符號16位)
__x128_t _dsmpy2 (long long src1, long long src2);DSMPY2見圖示
long long _dspacku4 (long long src1, long long src2);DSPACKU4并行進(jìn)行2個SPACK4
long long _dspint (__float2_t src);DSPINT將src中的2個單精度數(shù)轉(zhuǎn)成2個整型
unsigned _dspinth (__float2_t src);DSPINTH將src_e和src_o的兩個單精度浮點數(shù)轉(zhuǎn)陳高個有符號的16位整數(shù)
long long _dssub (long long src1, long long src2);DSSUB將src1中的2個32位有符號數(shù)減src2中的2個32位有符號數(shù),得到的結(jié)果進(jìn)行飽和[-2^31 (2^31)-1]
long long _dssub2 (long long src1, long long src2);DSSUB24個16位有符號數(shù)相減,結(jié)果進(jìn)行飽和[-2^15 (2^15)-1]
long long _dsub (long long src1, long longsrc2);DSUB不飽和
long long _dsub2 (long long src1, long long src2);DSUB2不飽和
__float2_t _dsubsp (__float2_t src1,__float2_t src2);DSUBSP32位單精度數(shù)相減
long long _dxpnd2 (unsigned src);DXPND2
long long _dxpnd4 (unsigned src);DXPND4
__float2_t _fdmvd_f2(float src1, floatsrc2);DMVD見MVD
int _land (int src1, int src2);LAND邏輯與
int _landn (int src1, int src2);LANDN
int _lor (int src1, int src2);LOR邏輯或
void _mfence();MFENCE延遲取指令流水線一直到內(nèi)存系統(tǒng)的busy標(biāo)志降低
double_mpysp2dp (float src1, float src2);MPYSP2DP將2個float相乘得到1個double結(jié)果
double_mpyspdp (float src1, doublesrc2);MPYSPDP1個float×1個double得到1個double
long long _mpyu2 (unsigned src1,unsigned src2 );MPYU22個無符號16位數(shù)×2個無符號16位數(shù)得到2個無符號32位數(shù)
__x128_t _qmpy32 (__x128_t src1,__x128_t src2);QMPY324路:32位有符號×32位有符號,結(jié)果的低32位放入dst
__x128_t _qmpysp (__x128_t src1,__x128_t src2);QMPYSP
__x128_t _qsmpy32r1 (__x128_t src1,__x128_t src2);QSMPY32R14路:有符號32位×有符號32位,得到32位。和QMOY32的區(qū)別是飽和round
unsigned _shl2 (unsigned src1, unsignedsrc2);SHL22個有符號16位,左移。Src2的低4位是移動的位數(shù)。結(jié)果也是當(dāng)做有符號16位
long long _unpkbu4 (unsigned src);UNPKBU4將無符號8位擴成無符號16位
long long _unpkh2 (unsigned src);UNPKH2有符號16位符號擴展
long long _unpkhu2 (unsigned src);UNPKHU2無符號16位進(jìn)行0擴展
long long _xorll_c (scst5 immediate src1,long long src2);XOR邏輯異或
結(jié)語
關(guān)于DSP320C6000的相關(guān)介紹就到這了,如有不足之處歡迎指正。
-
dsp
+關(guān)注
關(guān)注
553文章
7998瀏覽量
348896 -
DSP320C6000
+關(guān)注
關(guān)注
0文章
1瀏覽量
1981
發(fā)布評論請先 登錄
相關(guān)推薦
評論