3 void j_rev_dct_arm(DCTBLOCK data)
5 With DCTBLOCK being a pointer to an array of 64 'signed shorts'
7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
23 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 #include "libavutil/arm/asm.S"
30 #define FIX_0_298631336 2446
31 #define FIX_0_541196100 4433
32 #define FIX_0_765366865 6270
33 #define FIX_1_175875602 9633
34 #define FIX_1_501321110 12299
35 #define FIX_2_053119869 16819
36 #define FIX_3_072711026 25172
37 #define FIX_M_0_390180644 -3196
38 #define FIX_M_0_899976223 -7373
39 #define FIX_M_1_847759065 -15137
40 #define FIX_M_1_961570560 -16069
41 #define FIX_M_2_562915447 -20995
42 #define FIX_0xFFFF 0xFFFF
44 #define FIX_0_298631336_ID 0
45 #define FIX_0_541196100_ID 4
46 #define FIX_0_765366865_ID 8
47 #define FIX_1_175875602_ID 12
48 #define FIX_1_501321110_ID 16
49 #define FIX_2_053119869_ID 20
50 #define FIX_3_072711026_ID 24
51 #define FIX_M_0_390180644_ID 28
52 #define FIX_M_0_899976223_ID 32
53 #define FIX_M_1_847759065_ID 36
54 #define FIX_M_1_961570560_ID 40
55 #define FIX_M_2_562915447_ID 44
56 #define FIX_0xFFFF_ID 48
58 function ff_j_rev_dct_arm, export=1
59 push {r0, r4 - r11, lr}
61 mov lr, r0 @ lr = pointer to the current row
62 mov r12, #8 @ r12 = row-counter
63 movrel r11, const_array @ r11 = base pointer to the constants array
65 ldrsh r0, [lr, # 0] @ r0 = 'd0'
66 ldrsh r2, [lr, # 2] @ r2 = 'd2'
68 @ Optimization for row that have all items except the first set to 0
69 @ (this works as the int16_t are always 4-byte aligned)
77 beq end_of_row_loop @ nothing to be done as ALL of them are '0'
81 ldrsh r1, [lr, # 8] @ r1 = 'd1'
82 ldrsh r4, [lr, # 4] @ r4 = 'd4'
83 ldrsh r6, [lr, # 6] @ r6 = 'd6'
85 ldr r3, [r11, #FIX_0_541196100_ID]
87 ldr r5, [r11, #FIX_M_1_847759065_ID]
88 mul r7, r3, r7 @ r7 = z1
89 ldr r3, [r11, #FIX_0_765366865_ID]
90 mla r6, r5, r6, r7 @ r6 = tmp2
91 add r5, r0, r4 @ r5 = tmp0
92 mla r2, r3, r2, r7 @ r2 = tmp3
93 sub r3, r0, r4 @ r3 = tmp1
95 add r0, r2, r5, lsl #13 @ r0 = tmp10
96 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
97 add r4, r6, r3, lsl #13 @ r4 = tmp11
98 rsb r3, r6, r3, lsl #13 @ r3 = tmp12
100 push {r0, r2, r3, r4} @ save on the stack tmp10, tmp13, tmp12, tmp11
102 ldrsh r3, [lr, #10] @ r3 = 'd3'
103 ldrsh r5, [lr, #12] @ r5 = 'd5'
104 ldrsh r7, [lr, #14] @ r7 = 'd7'
106 add r0, r3, r5 @ r0 = 'z2'
107 add r2, r1, r7 @ r2 = 'z1'
108 add r4, r3, r7 @ r4 = 'z3'
109 add r6, r1, r5 @ r6 = 'z4'
110 ldr r9, [r11, #FIX_1_175875602_ID]
111 add r8, r4, r6 @ r8 = z3 + z4
112 ldr r10, [r11, #FIX_M_0_899976223_ID]
113 mul r8, r9, r8 @ r8 = 'z5'
114 ldr r9, [r11, #FIX_M_2_562915447_ID]
115 mul r2, r10, r2 @ r2 = 'z1'
116 ldr r10, [r11, #FIX_M_1_961570560_ID]
117 mul r0, r9, r0 @ r0 = 'z2'
118 ldr r9, [r11, #FIX_M_0_390180644_ID]
119 mla r4, r10, r4, r8 @ r4 = 'z3'
120 ldr r10, [r11, #FIX_0_298631336_ID]
121 mla r6, r9, r6, r8 @ r6 = 'z4'
122 ldr r9, [r11, #FIX_2_053119869_ID]
123 mla r7, r10, r7, r2 @ r7 = tmp0 + z1
124 ldr r10, [r11, #FIX_3_072711026_ID]
125 mla r5, r9, r5, r0 @ r5 = tmp1 + z2
126 ldr r9, [r11, #FIX_1_501321110_ID]
127 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
128 add r7, r7, r4 @ r7 = tmp0
129 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
130 add r5, r5, r6 @ r5 = tmp1
131 add r3, r3, r4 @ r3 = tmp2
132 add r1, r1, r6 @ r1 = tmp3
134 pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
135 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
137 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
143 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
149 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
155 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
161 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
167 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
173 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
179 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
189 beq start_column_loop
192 ldr r1, [r11, #FIX_0xFFFF_ID]
195 add r0, r0, r0, lsl #16
208 @ Start of column loop
212 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
213 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
214 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
215 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
217 ldr r3, [r11, #FIX_0_541196100_ID]
219 ldr r5, [r11, #FIX_M_1_847759065_ID]
220 mul r1, r3, r1 @ r1 = z1
221 ldr r3, [r11, #FIX_0_765366865_ID]
222 mla r6, r5, r6, r1 @ r6 = tmp2
223 add r5, r0, r4 @ r5 = tmp0
224 mla r2, r3, r2, r1 @ r2 = tmp3
225 sub r3, r0, r4 @ r3 = tmp1
227 add r0, r2, r5, lsl #13 @ r0 = tmp10
228 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
229 add r4, r6, r3, lsl #13 @ r4 = tmp11
230 rsb r6, r6, r3, lsl #13 @ r6 = tmp12
232 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
233 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
234 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
235 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
237 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
243 push {r0, r2, r4, r6} @ save on the stack tmp10, tmp13, tmp12, tmp11
245 add r0, r3, r5 @ r0 = 'z2'
246 add r2, r1, r7 @ r2 = 'z1'
247 add r4, r3, r7 @ r4 = 'z3'
248 add r6, r1, r5 @ r6 = 'z4'
249 ldr r9, [r11, #FIX_1_175875602_ID]
251 ldr r10, [r11, #FIX_M_0_899976223_ID]
252 mul r8, r9, r8 @ r8 = 'z5'
253 ldr r9, [r11, #FIX_M_2_562915447_ID]
254 mul r2, r10, r2 @ r2 = 'z1'
255 ldr r10, [r11, #FIX_M_1_961570560_ID]
256 mul r0, r9, r0 @ r0 = 'z2'
257 ldr r9, [r11, #FIX_M_0_390180644_ID]
258 mla r4, r10, r4, r8 @ r4 = 'z3'
259 ldr r10, [r11, #FIX_0_298631336_ID]
260 mla r6, r9, r6, r8 @ r6 = 'z4'
261 ldr r9, [r11, #FIX_2_053119869_ID]
262 mla r7, r10, r7, r2 @ r7 = tmp0 + z1
263 ldr r10, [r11, #FIX_3_072711026_ID]
264 mla r5, r9, r5, r0 @ r5 = tmp1 + z2
265 ldr r9, [r11, #FIX_1_501321110_ID]
266 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
267 add r7, r7, r4 @ r7 = tmp0
268 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
269 add r5, r5, r6 @ r5 = tmp1
270 add r3, r3, r4 @ r3 = tmp2
271 add r1, r1, r6 @ r1 = tmp3
273 pop {r0, r2, r4, r6} @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
274 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
276 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
280 strh r8, [lr, #( 0*8)]
282 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
286 strh r8, [lr, #(14*8)]
288 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
292 strh r8, [lr, #( 2*8)]
294 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
298 strh r8, [lr, #(12*8)]
300 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
304 strh r8, [lr, #( 4*8)]
306 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
310 strh r8, [lr, #(10*8)]
312 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
316 strh r8, [lr, #( 6*8)]
318 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
322 strh r8, [lr, #( 8*8)]
331 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
332 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
335 strh r0, [lr, #( 0*8)]
336 strh r0, [lr, #(14*8)]
338 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
339 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
342 strh r4, [lr, #( 2*8)]
343 strh r4, [lr, #(12*8)]
345 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
346 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
349 strh r6, [lr, #( 4*8)]
350 strh r6, [lr, #(10*8)]
352 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
353 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
356 strh r2, [lr, #( 6*8)]
357 strh r2, [lr, #( 8*8)]
370 .word FIX_0_298631336
371 .word FIX_0_541196100
372 .word FIX_0_765366865
373 .word FIX_1_175875602
374 .word FIX_1_501321110
375 .word FIX_2_053119869
376 .word FIX_3_072711026
377 .word FIX_M_0_390180644
378 .word FIX_M_0_899976223
379 .word FIX_M_1_847759065
380 .word FIX_M_1_961570560
381 .word FIX_M_2_562915447