Vector Addition on TMS320C2700
TMS320C2700 has high loop overhead
- No multi-instruction hardware looping
- Branches are costly
loop: mov ah,*ar2++ ; load element a0
add ah,*ar3++ ; add element b0
mov *ar4++,ah ; store sum a0+b0
; repeat the loop body instructions
mov ah,*ar2++ ; load element a1
add ah,*ar3++ ; add element b1
mov *ar4++,ah ; store sum a1+b1
banz loop,ar0-- ; branch to loop