]> git.sesse.net Git - ffmpeg/blob - libavcodec/x86/h264_cabac.c
cabac: x86: Give optimizations header a more meaningful name
[ffmpeg] / libavcodec / x86 / h264_cabac.c
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * H.264 / AVC / MPEG-4 part10 codec.
25  * non-SIMD x86-specific optimizations for H.264
26  * @author Michael Niedermayer <michaelni@gmx.at>
27  */
28
29 #include <stddef.h>
30
31 #include "libavcodec/cabac.h"
32 #include "cabac.h"
33
34 #if HAVE_INLINE_ASM
35
36 //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
37 //as that would make optimization work hard)
38 #if HAVE_7REGS
39 #define decode_significance decode_significance_x86
40 static int decode_significance_x86(CABACContext *c, int max_coeff,
41                                    uint8_t *significant_coeff_ctx_base,
42                                    int *index, x86_reg last_off){
43     void *end= significant_coeff_ctx_base + max_coeff - 1;
44     int minusstart= -(intptr_t)significant_coeff_ctx_base;
45     int minusindex= 4-(intptr_t)index;
46     int bit;
47     x86_reg coeff_count;
48
49 #ifdef BROKEN_RELOCATIONS
50     void *tables;
51
52     __asm__ volatile(
53         "lea   "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
54         : "=&r"(tables)
55     );
56 #endif
57
58     __asm__ volatile(
59         "3:                                     \n\t"
60
61         BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
62                              "%5", "%q5", "%k0", "%b0",
63                              "%c11(%6)", "%c12(%6)",
64                              AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
65                              AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
66                              AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
67                              "%13")
68
69         "test $1, %4                            \n\t"
70         " jz 4f                                 \n\t"
71         "add  %10, %1                           \n\t"
72
73         BRANCHLESS_GET_CABAC("%4", "%q4", "(%1)", "%3", "%w3",
74                              "%5", "%q5", "%k0", "%b0",
75                              "%c11(%6)", "%c12(%6)",
76                              AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
77                              AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
78                              AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
79                              "%13")
80
81         "sub  %10, %1                           \n\t"
82         "mov  %2, %0                            \n\t"
83         "movl %7, %%ecx                         \n\t"
84         "add  %1, %%"FF_REG_c"                  \n\t"
85         "movl %%ecx, (%0)                       \n\t"
86
87         "test $1, %4                            \n\t"
88         " jnz 5f                                \n\t"
89
90         "add"FF_OPSIZE"  $4, %2                 \n\t"
91
92         "4:                                     \n\t"
93         "add  $1, %1                            \n\t"
94         "cmp  %8, %1                            \n\t"
95         " jb 3b                                 \n\t"
96         "mov  %2, %0                            \n\t"
97         "movl %7, %%ecx                         \n\t"
98         "add  %1, %%"FF_REG_c"                  \n\t"
99         "movl %%ecx, (%0)                       \n\t"
100         "5:                                     \n\t"
101         "add  %9, %k0                           \n\t"
102         "shr $2, %k0                            \n\t"
103         : "=&q"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index),
104           "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
105         : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
106           "i"(offsetof(CABACContext, bytestream)),
107           "i"(offsetof(CABACContext, bytestream_end))
108           TABLES_ARG
109         : "%"FF_REG_c, "memory"
110     );
111     return coeff_count;
112 }
113
114 #define decode_significance_8x8 decode_significance_8x8_x86
115 static int decode_significance_8x8_x86(CABACContext *c,
116                                        uint8_t *significant_coeff_ctx_base,
117                                        int *index, uint8_t *last_coeff_ctx_base, const uint8_t *sig_off){
118     int minusindex= 4-(intptr_t)index;
119     int bit;
120     x86_reg coeff_count;
121     x86_reg last=0;
122     x86_reg state;
123
124 #ifdef BROKEN_RELOCATIONS
125     void *tables;
126
127     __asm__ volatile(
128         "lea    "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
129         : "=&r"(tables)
130     );
131 #endif
132
133     __asm__ volatile(
134         "mov %1, %6                             \n\t"
135         "3:                                     \n\t"
136
137         "mov %10, %0                            \n\t"
138         "movzbl (%0, %6), %k6                   \n\t"
139         "add %9, %6                             \n\t"
140
141         BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
142                              "%5", "%q5", "%k0", "%b0",
143                              "%c12(%7)", "%c13(%7)",
144                              AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
145                              AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
146                              AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
147                              "%15")
148
149         "mov %1, %k6                            \n\t"
150         "test $1, %4                            \n\t"
151         " jz 4f                                 \n\t"
152
153 #ifdef BROKEN_RELOCATIONS
154         "movzbl %c14(%15, %q6), %k6\n\t"
155 #else
156         "movzbl "MANGLE(ff_h264_cabac_tables)"+%c14(%k6), %k6\n\t"
157 #endif
158         "add %11, %6                            \n\t"
159
160         BRANCHLESS_GET_CABAC("%4", "%q4", "(%6)", "%3", "%w3",
161                              "%5", "%q5", "%k0", "%b0",
162                              "%c12(%7)", "%c13(%7)",
163                              AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
164                              AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
165                              AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
166                              "%15")
167
168         "mov %2, %0                             \n\t"
169         "mov %1, %k6                            \n\t"
170         "movl %k6, (%0)                         \n\t"
171
172         "test $1, %4                            \n\t"
173         " jnz 5f                                \n\t"
174
175         "add"FF_OPSIZE"  $4, %2                 \n\t"
176
177         "4:                                     \n\t"
178         "addl $1, %k6                           \n\t"
179         "mov %k6, %1                            \n\t"
180         "cmpl $63, %k6                          \n\t"
181         " jb 3b                                 \n\t"
182         "mov %2, %0                             \n\t"
183         "movl %k6, (%0)                         \n\t"
184         "5:                                     \n\t"
185         "addl %8, %k0                           \n\t"
186         "shr $2, %k0                            \n\t"
187         : "=&q"(coeff_count), "+m"(last), "+m"(index), "+&r"(c->low),
188           "=&r"(bit), "+&r"(c->range), "=&r"(state)
189         : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base),
190           "m"(sig_off), "m"(last_coeff_ctx_base),
191           "i"(offsetof(CABACContext, bytestream)),
192           "i"(offsetof(CABACContext, bytestream_end)),
193           "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG
194         : "%"FF_REG_c, "memory"
195     );
196     return coeff_count;
197 }
198 #endif /* HAVE_7REGS && !defined(BROKEN_RELOCATIONS) */
199
200 #endif /* HAVE_INLINE_ASM */