Hi, i've had a go at converting this little bit of C code to ARM vfp assembler can anyone see any problems with it or possible improvements
for(int j=0;j<count; j++)
{
output += array[j]*(*p++);
}
to this VFM assembly:
for(int j=0;j<count;j++){
asm volatile(
VFP_VECTOR_LENGTH(8)
"fldmias %0, {s0} \n\t"
"fldmias %1, {s1} \n\t"
"mov r2,%2 \n\t"
"mov r3,%3 \n\t"
"mov r4,%4,lsr #3 \n\t"
"beq one \n\t"
"0: \n\t"
"fldmias r2!, {s8-s15} \n\t"
"fldmias r3!, {s16-s23} \n\t"
"fmacs s0, s8, s16 \n\t"
"subs r4,r4, #1 \n\t"
"bne 0b \n\t"
"one: \n\t"
"mov r4,%4 \n\t"
"ands r4,r4,#7 \n\t"
"beq three \n\t"
"two: \n\t"
"fldmias r2!, {s2} \n\t"
"fldmias r3!, {s3} \n\t"
"fmacs s0,s2,s3 \n\t"
"subs r4,r4, #1 \n\t"
"bne two \n\t"
"three: \n\t"
VFP_VECTOR_LENGTH_ZERO
: "=r" (output)
: "r" (output), "r" (array), "r" (p),"r" (count)
: "r0", "cc", "memory"
);
}