3 void j_rev_dct_arm(DCTBLOCK data)
5 With DCTBLOCK being a pointer to an array of 64 'signed shorts'
7 Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
23 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 #define FIX_0_298631336 2446
31 #define FIX_0_541196100 4433
32 #define FIX_0_765366865 6270
33 #define FIX_1_175875602 9633
34 #define FIX_1_501321110 12299
35 #define FIX_2_053119869 16819
36 #define FIX_3_072711026 25172
37 #define FIX_M_0_390180644 -3196
38 #define FIX_M_0_899976223 -7373
39 #define FIX_M_1_847759065 -15137
40 #define FIX_M_1_961570560 -16069
41 #define FIX_M_2_562915447 -20995
42 #define FIX_0xFFFF 0xFFFF
44 #define FIX_0_298631336_ID 0
45 #define FIX_0_541196100_ID 4
46 #define FIX_0_765366865_ID 8
47 #define FIX_1_175875602_ID 12
48 #define FIX_1_501321110_ID 16
49 #define FIX_2_053119869_ID 20
50 #define FIX_3_072711026_ID 24
51 #define FIX_M_0_390180644_ID 28
52 #define FIX_M_0_899976223_ID 32
53 #define FIX_M_1_847759065_ID 36
54 #define FIX_M_1_961570560_ID 40
55 #define FIX_M_2_562915447_ID 44
56 #define FIX_0xFFFF_ID 48
60 function ff_j_rev_dct_arm, export=1
61 stmdb sp!, { r4 - r12, lr } @ all callee saved regs
63 sub sp, sp, #4 @ reserve some space on the stack
64 str r0, [ sp ] @ save the DCT pointer to the stack
66 mov lr, r0 @ lr = pointer to the current row
67 mov r12, #8 @ r12 = row-counter
68 adr r11, const_array @ r11 = base pointer to the constants array
70 ldrsh r0, [lr, # 0] @ r0 = 'd0'
71 ldrsh r2, [lr, # 2] @ r2 = 'd2'
73 @ Optimization for row that have all items except the first set to 0
74 @ (this works as the DCTELEMS are always 4-byte aligned)
82 beq end_of_row_loop @ nothing to be done as ALL of them are '0'
86 ldrsh r1, [lr, # 8] @ r1 = 'd1'
87 ldrsh r4, [lr, # 4] @ r4 = 'd4'
88 ldrsh r6, [lr, # 6] @ r6 = 'd6'
90 ldr r3, [r11, #FIX_0_541196100_ID]
92 ldr r5, [r11, #FIX_M_1_847759065_ID]
93 mul r7, r3, r7 @ r7 = z1
94 ldr r3, [r11, #FIX_0_765366865_ID]
95 mla r6, r5, r6, r7 @ r6 = tmp2
96 add r5, r0, r4 @ r5 = tmp0
97 mla r2, r3, r2, r7 @ r2 = tmp3
98 sub r3, r0, r4 @ r3 = tmp1
100 add r0, r2, r5, lsl #13 @ r0 = tmp10
101 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
102 add r4, r6, r3, lsl #13 @ r4 = tmp11
103 rsb r3, r6, r3, lsl #13 @ r3 = tmp12
105 stmdb sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
107 ldrsh r3, [lr, #10] @ r3 = 'd3'
108 ldrsh r5, [lr, #12] @ r5 = 'd5'
109 ldrsh r7, [lr, #14] @ r7 = 'd7'
111 add r0, r3, r5 @ r0 = 'z2'
112 add r2, r1, r7 @ r2 = 'z1'
113 add r4, r3, r7 @ r4 = 'z3'
114 add r6, r1, r5 @ r6 = 'z4'
115 ldr r9, [r11, #FIX_1_175875602_ID]
116 add r8, r4, r6 @ r8 = z3 + z4
117 ldr r10, [r11, #FIX_M_0_899976223_ID]
118 mul r8, r9, r8 @ r8 = 'z5'
119 ldr r9, [r11, #FIX_M_2_562915447_ID]
120 mul r2, r10, r2 @ r2 = 'z1'
121 ldr r10, [r11, #FIX_M_1_961570560_ID]
122 mul r0, r9, r0 @ r0 = 'z2'
123 ldr r9, [r11, #FIX_M_0_390180644_ID]
124 mla r4, r10, r4, r8 @ r4 = 'z3'
125 ldr r10, [r11, #FIX_0_298631336_ID]
126 mla r6, r9, r6, r8 @ r6 = 'z4'
127 ldr r9, [r11, #FIX_2_053119869_ID]
128 mla r7, r10, r7, r2 @ r7 = tmp0 + z1
129 ldr r10, [r11, #FIX_3_072711026_ID]
130 mla r5, r9, r5, r0 @ r5 = tmp1 + z2
131 ldr r9, [r11, #FIX_1_501321110_ID]
132 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
133 add r7, r7, r4 @ r7 = tmp0
134 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
135 add r5, r5, r6 @ r5 = tmp1
136 add r3, r3, r4 @ r3 = tmp2
137 add r1, r1, r6 @ r1 = tmp3
139 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
140 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
142 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
148 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
154 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
160 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
166 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
172 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
178 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
184 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
194 beq start_column_loop
197 ldr r1, [r11, #FIX_0xFFFF_ID]
200 add r0, r0, r0, lsl #16
213 @ Start of column loop
217 ldrsh r0, [lr, #( 0*8)] @ r0 = 'd0'
218 ldrsh r2, [lr, #( 4*8)] @ r2 = 'd2'
219 ldrsh r4, [lr, #( 8*8)] @ r4 = 'd4'
220 ldrsh r6, [lr, #(12*8)] @ r6 = 'd6'
222 ldr r3, [r11, #FIX_0_541196100_ID]
224 ldr r5, [r11, #FIX_M_1_847759065_ID]
225 mul r1, r3, r1 @ r1 = z1
226 ldr r3, [r11, #FIX_0_765366865_ID]
227 mla r6, r5, r6, r1 @ r6 = tmp2
228 add r5, r0, r4 @ r5 = tmp0
229 mla r2, r3, r2, r1 @ r2 = tmp3
230 sub r3, r0, r4 @ r3 = tmp1
232 add r0, r2, r5, lsl #13 @ r0 = tmp10
233 rsb r2, r2, r5, lsl #13 @ r2 = tmp13
234 add r4, r6, r3, lsl #13 @ r4 = tmp11
235 rsb r6, r6, r3, lsl #13 @ r6 = tmp12
237 ldrsh r1, [lr, #( 2*8)] @ r1 = 'd1'
238 ldrsh r3, [lr, #( 6*8)] @ r3 = 'd3'
239 ldrsh r5, [lr, #(10*8)] @ r5 = 'd5'
240 ldrsh r7, [lr, #(14*8)] @ r7 = 'd7'
242 @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
248 stmdb sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
250 add r0, r3, r5 @ r0 = 'z2'
251 add r2, r1, r7 @ r2 = 'z1'
252 add r4, r3, r7 @ r4 = 'z3'
253 add r6, r1, r5 @ r6 = 'z4'
254 ldr r9, [r11, #FIX_1_175875602_ID]
256 ldr r10, [r11, #FIX_M_0_899976223_ID]
257 mul r8, r9, r8 @ r8 = 'z5'
258 ldr r9, [r11, #FIX_M_2_562915447_ID]
259 mul r2, r10, r2 @ r2 = 'z1'
260 ldr r10, [r11, #FIX_M_1_961570560_ID]
261 mul r0, r9, r0 @ r0 = 'z2'
262 ldr r9, [r11, #FIX_M_0_390180644_ID]
263 mla r4, r10, r4, r8 @ r4 = 'z3'
264 ldr r10, [r11, #FIX_0_298631336_ID]
265 mla r6, r9, r6, r8 @ r6 = 'z4'
266 ldr r9, [r11, #FIX_2_053119869_ID]
267 mla r7, r10, r7, r2 @ r7 = tmp0 + z1
268 ldr r10, [r11, #FIX_3_072711026_ID]
269 mla r5, r9, r5, r0 @ r5 = tmp1 + z2
270 ldr r9, [r11, #FIX_1_501321110_ID]
271 mla r3, r10, r3, r0 @ r3 = tmp2 + z2
272 add r7, r7, r4 @ r7 = tmp0
273 mla r1, r9, r1, r2 @ r1 = tmp3 + z1
274 add r5, r5, r6 @ r5 = tmp1
275 add r3, r3, r4 @ r3 = tmp2
276 add r1, r1, r6 @ r1 = tmp3
278 ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
279 @ r1 = tmp3 / r3 = tmp2 / r5 = tmp1 / r7 = tmp0
281 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
285 strh r8, [lr, #( 0*8)]
287 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
291 strh r8, [lr, #(14*8)]
293 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
297 strh r8, [lr, #( 2*8)]
299 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
303 strh r8, [lr, #(12*8)]
305 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
309 strh r8, [lr, #( 4*8)]
311 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
315 strh r8, [lr, #(10*8)]
317 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
321 strh r8, [lr, #( 6*8)]
323 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
327 strh r8, [lr, #( 8*8)]
336 @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
337 @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
340 strh r0, [lr, #( 0*8)]
341 strh r0, [lr, #(14*8)]
343 @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
344 @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
347 strh r4, [lr, #( 2*8)]
348 strh r4, [lr, #(12*8)]
350 @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
351 @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
354 strh r6, [lr, #( 4*8)]
355 strh r6, [lr, #(10*8)]
357 @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
358 @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
361 strh r2, [lr, #( 6*8)]
362 strh r2, [lr, #( 8*8)]
372 ldmia sp!, { r4 - r12, pc } @ restore callee saved regs and return
376 .word FIX_0_298631336
377 .word FIX_0_541196100
378 .word FIX_0_765366865
379 .word FIX_1_175875602
380 .word FIX_1_501321110
381 .word FIX_2_053119869
382 .word FIX_3_072711026
383 .word FIX_M_0_390180644
384 .word FIX_M_0_899976223
385 .word FIX_M_1_847759065
386 .word FIX_M_1_961570560
387 .word FIX_M_2_562915447