vmull.s16 q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/
vld1.64 {d8}, [r2,:64], ip /* d5 = col[3] */
- ldrd r4, [r2]
- ldrd r6, [r2, #16]
+ ldrd r4, r5, [r2]
+ ldrd r6, r7, [r2, #16]
orrs r4, r4, r5
idct_col4_top
vadd.i32 q14, q14, q7
1: orrs r6, r6, r7
- ldrd r4, [r2, #16]
+ ldrd r4, r5, [r2, #16]
it eq
addeq r2, r2, #16
beq 2f
vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
2: orrs r4, r4, r5
- ldrd r4, [r2, #16]
+ ldrd r4, r5, [r2, #16]
it eq
addeq r2, r2, #16
beq 3f
pop {r4-r7, pc}
.endm
-/* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, DCTELEM *data); */
+/* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, int16_t *data); */
function ff_simple_idct_put_neon, export=1
idct_start r2
bx lr
endfunc
-/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
+/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, int16_t *data); */
function ff_simple_idct_add_neon, export=1
idct_start r2
bx lr
endfunc
-/* void ff_simple_idct_neon(DCTELEM *data); */
+/* void ff_simple_idct_neon(int16_t *data); */
function ff_simple_idct_neon, export=1
idct_start r0