2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
64 static Picture * remove_long(H264Context *h, int i, int ref_mask);
66 static av_always_inline uint32_t pack16to32(int a, int b){
67 #ifdef WORDS_BIGENDIAN
68 return (b&0xFFFF) + (a<<16);
70 return (a&0xFFFF) + (b<<16);
74 const uint8_t ff_rem6[52]={
75 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
78 const uint8_t ff_div6[52]={
79 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static const int left_block_options[4][8]={
89 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
90 MpegEncContext * const s = &h->s;
91 const int mb_xy= h->mb_xy;
92 int topleft_xy, top_xy, topright_xy, left_xy[2];
93 int topleft_type, top_type, topright_type, left_type[2];
95 int topleft_partition= -1;
98 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
100 //FIXME deblocking could skip the intra and nnz parts.
101 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
104 /* Wow, what a mess, why didn't they simplify the interlacing & intra
105 * stuff, I can't imagine that these complex rules are worth it. */
107 topleft_xy = top_xy - 1;
108 topright_xy= top_xy + 1;
109 left_xy[1] = left_xy[0] = mb_xy-1;
110 left_block = left_block_options[0];
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
137 topleft_partition = 0;
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
149 left_block = left_block_options[1];
151 left_block= left_block_options[2];
154 left_xy[1] += s->mb_stride;
155 left_block = left_block_options[3];
160 h->top_mb_xy = top_xy;
161 h->left_mb_xy[0] = left_xy[0];
162 h->left_mb_xy[1] = left_xy[1];
166 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
167 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
168 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
170 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
172 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
174 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
175 for(list=0; list<h->list_count; list++){
176 if(USES_LIST(mb_type,list)){
177 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
178 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
179 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
180 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
186 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
187 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
189 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
190 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
192 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
193 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
198 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
199 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
200 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
201 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
202 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
205 if(IS_INTRA(mb_type)){
206 h->topleft_samples_available=
207 h->top_samples_available=
208 h->left_samples_available= 0xFFFF;
209 h->topright_samples_available= 0xEEEA;
211 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
212 h->topleft_samples_available= 0xB3FF;
213 h->top_samples_available= 0x33FF;
214 h->topright_samples_available= 0x26EA;
217 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
218 h->topleft_samples_available&= 0xDF5F;
219 h->left_samples_available&= 0x5F5F;
223 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
224 h->topleft_samples_available&= 0x7FFF;
226 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
227 h->topright_samples_available&= 0xFBFF;
229 if(IS_INTRA4x4(mb_type)){
230 if(IS_INTRA4x4(top_type)){
231 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
232 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
233 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
234 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
237 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
242 h->intra4x4_pred_mode_cache[4+8*0]=
243 h->intra4x4_pred_mode_cache[5+8*0]=
244 h->intra4x4_pred_mode_cache[6+8*0]=
245 h->intra4x4_pred_mode_cache[7+8*0]= pred;
248 if(IS_INTRA4x4(left_type[i])){
249 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
250 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
253 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
258 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
259 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
274 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
276 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
277 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
278 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
279 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
281 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
282 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
284 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
285 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
288 h->non_zero_count_cache[4+8*0]=
289 h->non_zero_count_cache[5+8*0]=
290 h->non_zero_count_cache[6+8*0]=
291 h->non_zero_count_cache[7+8*0]=
293 h->non_zero_count_cache[1+8*0]=
294 h->non_zero_count_cache[2+8*0]=
296 h->non_zero_count_cache[1+8*3]=
297 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
301 for (i=0; i<2; i++) {
303 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
304 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
305 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
306 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
308 h->non_zero_count_cache[3+8*1 + 2*8*i]=
309 h->non_zero_count_cache[3+8*2 + 2*8*i]=
310 h->non_zero_count_cache[0+8*1 + 8*i]=
311 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
318 h->top_cbp = h->cbp_table[top_xy];
319 } else if(IS_INTRA(mb_type)) {
326 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
327 } else if(IS_INTRA(mb_type)) {
333 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
336 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
341 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
343 for(list=0; list<h->list_count; list++){
344 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
345 /*if(!h->mv_cache_clean[list]){
346 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
347 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
348 h->mv_cache_clean[list]= 1;
352 h->mv_cache_clean[list]= 0;
354 if(USES_LIST(top_type, list)){
355 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
356 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
357 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
358 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
359 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
360 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
361 h->ref_cache[list][scan8[0] + 0 - 1*8]=
362 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
363 h->ref_cache[list][scan8[0] + 2 - 1*8]=
364 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
366 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
367 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
368 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
369 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
370 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
374 int cache_idx = scan8[0] - 1 + i*2*8;
375 if(USES_LIST(left_type[i], list)){
376 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
377 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
378 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
379 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
380 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
381 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
383 *(uint32_t*)h->mv_cache [list][cache_idx ]=
384 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
385 h->ref_cache[list][cache_idx ]=
386 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
390 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
393 if(USES_LIST(topleft_type, list)){
394 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
395 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
396 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
397 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
399 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
400 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
403 if(USES_LIST(topright_type, list)){
404 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
405 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
406 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
407 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
409 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
410 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
413 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
416 h->ref_cache[list][scan8[5 ]+1] =
417 h->ref_cache[list][scan8[7 ]+1] =
418 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
419 h->ref_cache[list][scan8[4 ]] =
420 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
421 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
422 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
423 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
424 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
425 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
428 /* XXX beurk, Load mvd */
429 if(USES_LIST(top_type, list)){
430 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
431 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
432 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
433 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
434 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
436 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
437 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
438 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
439 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
441 if(USES_LIST(left_type[0], list)){
442 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
443 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
444 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
446 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
447 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
449 if(USES_LIST(left_type[1], list)){
450 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
451 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
454 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
457 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
458 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
459 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
460 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
461 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
463 if(h->slice_type_nos == FF_B_TYPE){
464 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
466 if(IS_DIRECT(top_type)){
467 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
468 }else if(IS_8X8(top_type)){
469 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
470 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
471 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
473 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
476 if(IS_DIRECT(left_type[0]))
477 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
478 else if(IS_8X8(left_type[0]))
479 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
481 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
483 if(IS_DIRECT(left_type[1]))
484 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
485 else if(IS_8X8(left_type[1]))
486 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
488 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
494 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
495 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
496 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
497 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
498 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
499 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
500 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
501 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
502 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
503 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
505 #define MAP_F2F(idx, mb_type)\
506 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
507 h->ref_cache[list][idx] <<= 1;\
508 h->mv_cache[list][idx][1] /= 2;\
509 h->mvd_cache[list][idx][1] /= 2;\
514 #define MAP_F2F(idx, mb_type)\
515 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
516 h->ref_cache[list][idx] >>= 1;\
517 h->mv_cache[list][idx][1] <<= 1;\
518 h->mvd_cache[list][idx][1] <<= 1;\
528 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
531 static inline void write_back_intra_pred_mode(H264Context *h){
532 const int mb_xy= h->mb_xy;
534 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
535 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
536 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
537 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
538 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
539 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
540 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
544 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
546 static inline int check_intra4x4_pred_mode(H264Context *h){
547 MpegEncContext * const s = &h->s;
548 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
549 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
552 if(!(h->top_samples_available&0x8000)){
554 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
556 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
559 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
564 if(!(h->left_samples_available&0x8000)){
566 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
568 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
571 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
577 } //FIXME cleanup like next
580 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
582 static inline int check_intra_pred_mode(H264Context *h, int mode){
583 MpegEncContext * const s = &h->s;
584 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
585 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
588 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
592 if(!(h->top_samples_available&0x8000)){
595 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
600 if(!(h->left_samples_available&0x8000)){
603 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
612 * gets the predicted intra4x4 prediction mode.
614 static inline int pred_intra_mode(H264Context *h, int n){
615 const int index8= scan8[n];
616 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
617 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
618 const int min= FFMIN(left, top);
620 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
622 if(min<0) return DC_PRED;
626 static inline void write_back_non_zero_count(H264Context *h){
627 const int mb_xy= h->mb_xy;
629 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
630 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
631 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
632 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
633 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
634 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
635 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
637 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
638 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
639 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
641 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
642 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
643 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
646 // store all luma nnzs, for deblocking
649 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
650 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
655 * gets the predicted number of non-zero coefficients.
656 * @param n block index
658 static inline int pred_non_zero_count(H264Context *h, int n){
659 const int index8= scan8[n];
660 const int left= h->non_zero_count_cache[index8 - 1];
661 const int top = h->non_zero_count_cache[index8 - 8];
664 if(i<64) i= (i+1)>>1;
666 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
671 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
672 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
673 MpegEncContext *s = &h->s;
675 /* there is no consistent mapping of mvs to neighboring locations that will
676 * make mbaff happy, so we can't move all this logic to fill_caches */
678 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
680 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
681 *C = h->mv_cache[list][scan8[0]-2];
684 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
685 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
686 if(IS_INTERLACED(mb_types[topright_xy])){
687 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
688 const int x4 = X4, y4 = Y4;\
689 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
690 if(!USES_LIST(mb_type,list))\
691 return LIST_NOT_USED;\
692 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
693 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
694 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
695 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
697 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
700 if(topright_ref == PART_NOT_AVAILABLE
701 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
702 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
704 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
705 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
708 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
710 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
711 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
717 if(topright_ref != PART_NOT_AVAILABLE){
718 *C= h->mv_cache[list][ i - 8 + part_width ];
721 tprintf(s->avctx, "topright MV not available\n");
723 *C= h->mv_cache[list][ i - 8 - 1 ];
724 return h->ref_cache[list][ i - 8 - 1 ];
729 * gets the predicted MV.
730 * @param n the block index
731 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
732 * @param mx the x component of the predicted motion vector
733 * @param my the y component of the predicted motion vector
735 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
736 const int index8= scan8[n];
737 const int top_ref= h->ref_cache[list][ index8 - 8 ];
738 const int left_ref= h->ref_cache[list][ index8 - 1 ];
739 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
740 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
742 int diagonal_ref, match_count;
744 assert(part_width==1 || part_width==2 || part_width==4);
754 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
755 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
756 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
757 if(match_count > 1){ //most common
758 *mx= mid_pred(A[0], B[0], C[0]);
759 *my= mid_pred(A[1], B[1], C[1]);
760 }else if(match_count==1){
764 }else if(top_ref==ref){
772 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
776 *mx= mid_pred(A[0], B[0], C[0]);
777 *my= mid_pred(A[1], B[1], C[1]);
781 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
785 * gets the directionally predicted 16x8 MV.
786 * @param n the block index
787 * @param mx the x component of the predicted motion vector
788 * @param my the y component of the predicted motion vector
790 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
792 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
793 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
795 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
803 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
804 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
806 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
816 pred_motion(h, n, 4, list, ref, mx, my);
820 * gets the directionally predicted 8x16 MV.
821 * @param n the block index
822 * @param mx the x component of the predicted motion vector
823 * @param my the y component of the predicted motion vector
825 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
827 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
828 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
830 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
841 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
843 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
845 if(diagonal_ref == ref){
853 pred_motion(h, n, 2, list, ref, mx, my);
856 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
857 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
858 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
860 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
862 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
863 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
864 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
870 pred_motion(h, 0, 4, 0, 0, mx, my);
875 static inline void direct_dist_scale_factor(H264Context * const h){
876 MpegEncContext * const s = &h->s;
877 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
878 const int poc1 = h->ref_list[1][0].poc;
880 for(i=0; i<h->ref_count[0]; i++){
881 int poc0 = h->ref_list[0][i].poc;
882 int td = av_clip(poc1 - poc0, -128, 127);
883 if(td == 0 /* FIXME || pic0 is a long-term ref */){
884 h->dist_scale_factor[i] = 256;
886 int tb = av_clip(poc - poc0, -128, 127);
887 int tx = (16384 + (FFABS(td) >> 1)) / td;
888 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
892 for(i=0; i<h->ref_count[0]; i++){
893 h->dist_scale_factor_field[2*i] =
894 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
898 static inline void direct_ref_list_init(H264Context * const h){
899 MpegEncContext * const s = &h->s;
900 Picture * const ref1 = &h->ref_list[1][0];
901 Picture * const cur = s->current_picture_ptr;
903 int sidx= s->picture_structure&1;
904 if(cur->pict_type == FF_I_TYPE)
905 cur->ref_count[sidx][0] = 0;
906 if(cur->pict_type != FF_B_TYPE)
907 cur->ref_count[sidx][1] = 0;
908 for(list=0; list<2; list++){
909 cur->ref_count[sidx][list] = h->ref_count[list];
910 for(j=0; j<h->ref_count[list]; j++)
911 cur->ref_poc[sidx][list][j] = h->ref_list[list][j].poc;
913 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
915 for(list=0; list<2; list++){
916 for(i=0; i<ref1->ref_count[sidx][list]; i++){
917 const int poc = ref1->ref_poc[sidx][list][i];
918 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
919 for(j=0; j<h->ref_count[list]; j++)
920 if(h->ref_list[list][j].poc == poc){
921 h->map_col_to_list0[list][i] = j;
927 for(list=0; list<2; list++){
928 for(i=0; i<ref1->ref_count[sidx][list]; i++){
929 j = h->map_col_to_list0[list][i];
930 h->map_col_to_list0_field[list][2*i] = 2*j;
931 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
937 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
938 MpegEncContext * const s = &h->s;
939 const int mb_xy = h->mb_xy;
940 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
941 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
942 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
943 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
944 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
945 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
946 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
947 const int is_b8x8 = IS_8X8(*mb_type);
948 unsigned int sub_mb_type;
951 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
952 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
953 /* FIXME save sub mb types from previous frames (or derive from MVs)
954 * so we know exactly what block size to use */
955 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
956 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
957 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
958 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
959 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
961 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
962 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
965 *mb_type |= MB_TYPE_DIRECT2;
967 *mb_type |= MB_TYPE_INTERLACED;
969 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
971 if(h->direct_spatial_mv_pred){
976 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
978 /* ref = min(neighbors) */
979 for(list=0; list<2; list++){
980 int refa = h->ref_cache[list][scan8[0] - 1];
981 int refb = h->ref_cache[list][scan8[0] - 8];
982 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
983 if(refc == PART_NOT_AVAILABLE)
984 refc = h->ref_cache[list][scan8[0] - 8 - 1];
985 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
990 if(ref[0] < 0 && ref[1] < 0){
992 mv[0][0] = mv[0][1] =
993 mv[1][0] = mv[1][1] = 0;
995 for(list=0; list<2; list++){
997 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
999 mv[list][0] = mv[list][1] = 0;
1005 *mb_type &= ~MB_TYPE_L1;
1006 sub_mb_type &= ~MB_TYPE_L1;
1007 }else if(ref[0] < 0){
1009 *mb_type &= ~MB_TYPE_L0;
1010 sub_mb_type &= ~MB_TYPE_L0;
1013 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1014 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1015 int mb_types_col[2];
1016 int b8_stride = h->b8_stride;
1017 int b4_stride = h->b_stride;
1019 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1021 if(IS_INTERLACED(*mb_type)){
1022 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1023 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1025 l1ref0 -= 2*b8_stride;
1026 l1ref1 -= 2*b8_stride;
1027 l1mv0 -= 4*b4_stride;
1028 l1mv1 -= 4*b4_stride;
1033 int cur_poc = s->current_picture_ptr->poc;
1034 int *col_poc = h->ref_list[1]->field_poc;
1035 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1036 int dy = 2*col_parity - (s->mb_y&1);
1038 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1039 l1ref0 += dy*b8_stride;
1040 l1ref1 += dy*b8_stride;
1041 l1mv0 += 2*dy*b4_stride;
1042 l1mv1 += 2*dy*b4_stride;
1046 for(i8=0; i8<4; i8++){
1049 int xy8 = x8+y8*b8_stride;
1050 int xy4 = 3*x8+y8*b4_stride;
1053 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1055 h->sub_mb_type[i8] = sub_mb_type;
1057 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1058 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1059 if(!IS_INTRA(mb_types_col[y8])
1060 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1061 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1063 a= pack16to32(mv[0][0],mv[0][1]);
1065 b= pack16to32(mv[1][0],mv[1][1]);
1067 a= pack16to32(mv[0][0],mv[0][1]);
1068 b= pack16to32(mv[1][0],mv[1][1]);
1070 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1071 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1073 }else if(IS_16X16(*mb_type)){
1076 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1077 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1078 if(!IS_INTRA(mb_type_col)
1079 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1080 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1081 && (h->x264_build>33 || !h->x264_build)))){
1083 a= pack16to32(mv[0][0],mv[0][1]);
1085 b= pack16to32(mv[1][0],mv[1][1]);
1087 a= pack16to32(mv[0][0],mv[0][1]);
1088 b= pack16to32(mv[1][0],mv[1][1]);
1090 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1091 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1093 for(i8=0; i8<4; i8++){
1094 const int x8 = i8&1;
1095 const int y8 = i8>>1;
1097 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1099 h->sub_mb_type[i8] = sub_mb_type;
1101 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1102 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1103 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1104 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1107 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1108 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1109 && (h->x264_build>33 || !h->x264_build)))){
1110 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1111 if(IS_SUB_8X8(sub_mb_type)){
1112 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1113 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1115 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1117 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1120 for(i4=0; i4<4; i4++){
1121 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1122 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1124 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1126 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1132 }else{ /* direct temporal mv pred */
1133 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1134 const int *dist_scale_factor = h->dist_scale_factor;
1137 if(IS_INTERLACED(*mb_type)){
1138 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1139 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1140 dist_scale_factor = h->dist_scale_factor_field;
1142 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1143 /* FIXME assumes direct_8x8_inference == 1 */
1144 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1145 int mb_types_col[2];
1148 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1149 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1150 | (*mb_type & MB_TYPE_INTERLACED);
1151 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1153 if(IS_INTERLACED(*mb_type)){
1154 /* frame to field scaling */
1155 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1156 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1158 l1ref0 -= 2*h->b8_stride;
1159 l1ref1 -= 2*h->b8_stride;
1160 l1mv0 -= 4*h->b_stride;
1161 l1mv1 -= 4*h->b_stride;
1165 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1166 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1168 *mb_type |= MB_TYPE_16x8;
1170 *mb_type |= MB_TYPE_8x8;
1172 /* field to frame scaling */
1173 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1174 * but in MBAFF, top and bottom POC are equal */
1175 int dy = (s->mb_y&1) ? 1 : 2;
1177 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1178 l1ref0 += dy*h->b8_stride;
1179 l1ref1 += dy*h->b8_stride;
1180 l1mv0 += 2*dy*h->b_stride;
1181 l1mv1 += 2*dy*h->b_stride;
1184 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1186 *mb_type |= MB_TYPE_16x16;
1188 *mb_type |= MB_TYPE_8x8;
1191 for(i8=0; i8<4; i8++){
1192 const int x8 = i8&1;
1193 const int y8 = i8>>1;
1195 const int16_t (*l1mv)[2]= l1mv0;
1197 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1199 h->sub_mb_type[i8] = sub_mb_type;
1201 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1202 if(IS_INTRA(mb_types_col[y8])){
1203 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1204 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1205 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1209 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1211 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1213 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1216 scale = dist_scale_factor[ref0];
1217 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1220 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1221 int my_col = (mv_col[1]<<y_shift)/2;
1222 int mx = (scale * mv_col[0] + 128) >> 8;
1223 int my = (scale * my_col + 128) >> 8;
1224 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1225 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1232 /* one-to-one mv scaling */
1234 if(IS_16X16(*mb_type)){
1237 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1238 if(IS_INTRA(mb_type_col)){
1241 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1242 : map_col_to_list0[1][l1ref1[0]];
1243 const int scale = dist_scale_factor[ref0];
1244 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1246 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1247 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1249 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1250 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1252 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1253 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1254 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1256 for(i8=0; i8<4; i8++){
1257 const int x8 = i8&1;
1258 const int y8 = i8>>1;
1260 const int16_t (*l1mv)[2]= l1mv0;
1262 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1264 h->sub_mb_type[i8] = sub_mb_type;
1265 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1266 if(IS_INTRA(mb_type_col)){
1267 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1268 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1269 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1273 ref0 = l1ref0[x8 + y8*h->b8_stride];
1275 ref0 = map_col_to_list0[0][ref0];
1277 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1280 scale = dist_scale_factor[ref0];
1282 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1283 if(IS_SUB_8X8(sub_mb_type)){
1284 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1285 int mx = (scale * mv_col[0] + 128) >> 8;
1286 int my = (scale * mv_col[1] + 128) >> 8;
1287 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1288 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1290 for(i4=0; i4<4; i4++){
1291 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1292 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1293 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1294 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1295 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1296 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1303 static inline void write_back_motion(H264Context *h, int mb_type){
1304 MpegEncContext * const s = &h->s;
1305 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1306 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1309 if(!USES_LIST(mb_type, 0))
1310 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1312 for(list=0; list<h->list_count; list++){
1314 if(!USES_LIST(mb_type, list))
1318 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1319 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1321 if( h->pps.cabac ) {
1322 if(IS_SKIP(mb_type))
1323 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1326 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1327 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1332 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1333 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1334 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1335 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1336 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1340 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1341 if(IS_8X8(mb_type)){
1342 uint8_t *direct_table = &h->direct_table[b8_xy];
1343 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1344 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1345 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1351 * Decodes a network abstraction layer unit.
1352 * @param consumed is the number of bytes used as input
1353 * @param length is the length of the array
1354 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1355 * @returns decoded bytes, might be src+1 if no escapes
1357 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1362 // src[0]&0x80; //forbidden bit
1363 h->nal_ref_idc= src[0]>>5;
1364 h->nal_unit_type= src[0]&0x1F;
1368 for(i=0; i<length; i++)
1369 printf("%2X ", src[i]);
1371 for(i=0; i+1<length; i+=2){
1372 if(src[i]) continue;
1373 if(i>0 && src[i-1]==0) i--;
1374 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1376 /* startcode, so we must be past the end */
1383 if(i>=length-1){ //no escaped 0
1384 *dst_length= length;
1385 *consumed= length+1; //+1 for the header
1389 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1390 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1391 dst= h->rbsp_buffer[bufidx];
1397 //printf("decoding esc\n");
1400 //remove escapes (very rare 1:2^22)
1401 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1402 if(src[si+2]==3){ //escape
1407 }else //next start code
1411 dst[di++]= src[si++];
1415 *consumed= si + 1;//+1 for the header
1416 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1421 * identifies the exact end of the bitstream
1422 * @return the length of the trailing, or 0 if damaged
1424 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1428 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1438 * IDCT transforms the 16 dc values and dequantizes them.
1439 * @param qp quantization parameter
1441 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1444 int temp[16]; //FIXME check if this is a good idea
1445 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1446 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1448 //memset(block, 64, 2*256);
1451 const int offset= y_offset[i];
1452 const int z0= block[offset+stride*0] + block[offset+stride*4];
1453 const int z1= block[offset+stride*0] - block[offset+stride*4];
1454 const int z2= block[offset+stride*1] - block[offset+stride*5];
1455 const int z3= block[offset+stride*1] + block[offset+stride*5];
1464 const int offset= x_offset[i];
1465 const int z0= temp[4*0+i] + temp[4*2+i];
1466 const int z1= temp[4*0+i] - temp[4*2+i];
1467 const int z2= temp[4*1+i] - temp[4*3+i];
1468 const int z3= temp[4*1+i] + temp[4*3+i];
1470 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1471 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1472 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1473 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1479 * DCT transforms the 16 dc values.
1480 * @param qp quantization parameter ??? FIXME
1482 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1483 // const int qmul= dequant_coeff[qp][0];
1485 int temp[16]; //FIXME check if this is a good idea
1486 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1487 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1490 const int offset= y_offset[i];
1491 const int z0= block[offset+stride*0] + block[offset+stride*4];
1492 const int z1= block[offset+stride*0] - block[offset+stride*4];
1493 const int z2= block[offset+stride*1] - block[offset+stride*5];
1494 const int z3= block[offset+stride*1] + block[offset+stride*5];
1503 const int offset= x_offset[i];
1504 const int z0= temp[4*0+i] + temp[4*2+i];
1505 const int z1= temp[4*0+i] - temp[4*2+i];
1506 const int z2= temp[4*1+i] - temp[4*3+i];
1507 const int z3= temp[4*1+i] + temp[4*3+i];
1509 block[stride*0 +offset]= (z0 + z3)>>1;
1510 block[stride*2 +offset]= (z1 + z2)>>1;
1511 block[stride*8 +offset]= (z1 - z2)>>1;
1512 block[stride*10+offset]= (z0 - z3)>>1;
1520 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1521 const int stride= 16*2;
1522 const int xStride= 16;
1525 a= block[stride*0 + xStride*0];
1526 b= block[stride*0 + xStride*1];
1527 c= block[stride*1 + xStride*0];
1528 d= block[stride*1 + xStride*1];
1535 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1536 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1537 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1538 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1542 static void chroma_dc_dct_c(DCTELEM *block){
1543 const int stride= 16*2;
1544 const int xStride= 16;
1547 a= block[stride*0 + xStride*0];
1548 b= block[stride*0 + xStride*1];
1549 c= block[stride*1 + xStride*0];
1550 d= block[stride*1 + xStride*1];
1557 block[stride*0 + xStride*0]= (a+c);
1558 block[stride*0 + xStride*1]= (e+b);
1559 block[stride*1 + xStride*0]= (a-c);
1560 block[stride*1 + xStride*1]= (e-b);
1565 * gets the chroma qp.
1567 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1568 return h->pps.chroma_qp_table[t][qscale];
1571 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1572 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1573 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1575 const int * const quant_table= quant_coeff[qscale];
1576 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1577 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1578 const unsigned int threshold2= (threshold1<<1);
1584 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1585 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1586 const unsigned int dc_threshold2= (dc_threshold1<<1);
1588 int level= block[0]*quant_coeff[qscale+18][0];
1589 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1591 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1594 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1597 // last_non_zero = i;
1602 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1603 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1604 const unsigned int dc_threshold2= (dc_threshold1<<1);
1606 int level= block[0]*quant_table[0];
1607 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1609 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1612 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1615 // last_non_zero = i;
1628 const int j= scantable[i];
1629 int level= block[j]*quant_table[j];
1631 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1632 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1633 if(((unsigned)(level+threshold1))>threshold2){
1635 level= (bias + level)>>QUANT_SHIFT;
1638 level= (bias - level)>>QUANT_SHIFT;
1647 return last_non_zero;
1650 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1651 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1652 int src_x_offset, int src_y_offset,
1653 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1654 MpegEncContext * const s = &h->s;
1655 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1656 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1657 const int luma_xy= (mx&3) + ((my&3)<<2);
1658 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1659 uint8_t * src_cb, * src_cr;
1660 int extra_width= h->emu_edge_width;
1661 int extra_height= h->emu_edge_height;
1663 const int full_mx= mx>>2;
1664 const int full_my= my>>2;
1665 const int pic_width = 16*s->mb_width;
1666 const int pic_height = 16*s->mb_height >> MB_FIELD;
1668 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1671 if(mx&7) extra_width -= 3;
1672 if(my&7) extra_height -= 3;
1674 if( full_mx < 0-extra_width
1675 || full_my < 0-extra_height
1676 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1677 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1678 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1679 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1683 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1685 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1688 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1691 // chroma offset when predicting from a field of opposite parity
1692 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1693 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1695 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1696 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1699 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1700 src_cb= s->edge_emu_buffer;
1702 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1705 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1706 src_cr= s->edge_emu_buffer;
1708 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1711 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1712 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1713 int x_offset, int y_offset,
1714 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1715 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1716 int list0, int list1){
1717 MpegEncContext * const s = &h->s;
1718 qpel_mc_func *qpix_op= qpix_put;
1719 h264_chroma_mc_func chroma_op= chroma_put;
1721 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1722 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1723 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1724 x_offset += 8*s->mb_x;
1725 y_offset += 8*(s->mb_y >> MB_FIELD);
1728 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1729 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1730 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1731 qpix_op, chroma_op);
1734 chroma_op= chroma_avg;
1738 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1739 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1740 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1741 qpix_op, chroma_op);
1745 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1746 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1747 int x_offset, int y_offset,
1748 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1749 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1750 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1751 int list0, int list1){
1752 MpegEncContext * const s = &h->s;
1754 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1755 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1756 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1757 x_offset += 8*s->mb_x;
1758 y_offset += 8*(s->mb_y >> MB_FIELD);
1761 /* don't optimize for luma-only case, since B-frames usually
1762 * use implicit weights => chroma too. */
1763 uint8_t *tmp_cb = s->obmc_scratchpad;
1764 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1765 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1766 int refn0 = h->ref_cache[0][ scan8[n] ];
1767 int refn1 = h->ref_cache[1][ scan8[n] ];
1769 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1770 dest_y, dest_cb, dest_cr,
1771 x_offset, y_offset, qpix_put, chroma_put);
1772 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1773 tmp_y, tmp_cb, tmp_cr,
1774 x_offset, y_offset, qpix_put, chroma_put);
1776 if(h->use_weight == 2){
1777 int weight0 = h->implicit_weight[refn0][refn1];
1778 int weight1 = 64 - weight0;
1779 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1780 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1781 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1783 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1784 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1785 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1786 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1787 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1788 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1789 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1790 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1791 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1794 int list = list1 ? 1 : 0;
1795 int refn = h->ref_cache[list][ scan8[n] ];
1796 Picture *ref= &h->ref_list[list][refn];
1797 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1798 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1799 qpix_put, chroma_put);
1801 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1802 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1803 if(h->use_weight_chroma){
1804 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1805 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1806 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1807 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1812 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1813 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1814 int x_offset, int y_offset,
1815 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1816 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1817 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1818 int list0, int list1){
1819 if((h->use_weight==2 && list0 && list1
1820 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1821 || h->use_weight==1)
1822 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1823 x_offset, y_offset, qpix_put, chroma_put,
1824 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1826 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1827 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1830 static inline void prefetch_motion(H264Context *h, int list){
1831 /* fetch pixels for estimated mv 4 macroblocks ahead
1832 * optimized for 64byte cache lines */
1833 MpegEncContext * const s = &h->s;
1834 const int refn = h->ref_cache[list][scan8[0]];
1836 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1837 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1838 uint8_t **src= h->ref_list[list][refn].data;
1839 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1840 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1841 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1842 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1846 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1847 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1848 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1849 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1850 MpegEncContext * const s = &h->s;
1851 const int mb_xy= h->mb_xy;
1852 const int mb_type= s->current_picture.mb_type[mb_xy];
1854 assert(IS_INTER(mb_type));
1856 prefetch_motion(h, 0);
1858 if(IS_16X16(mb_type)){
1859 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1860 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1861 &weight_op[0], &weight_avg[0],
1862 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1863 }else if(IS_16X8(mb_type)){
1864 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1865 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1866 &weight_op[1], &weight_avg[1],
1867 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1868 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1869 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1870 &weight_op[1], &weight_avg[1],
1871 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1872 }else if(IS_8X16(mb_type)){
1873 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1874 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1875 &weight_op[2], &weight_avg[2],
1876 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1877 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1878 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1879 &weight_op[2], &weight_avg[2],
1880 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1884 assert(IS_8X8(mb_type));
1887 const int sub_mb_type= h->sub_mb_type[i];
1889 int x_offset= (i&1)<<2;
1890 int y_offset= (i&2)<<1;
1892 if(IS_SUB_8X8(sub_mb_type)){
1893 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1894 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1895 &weight_op[3], &weight_avg[3],
1896 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1897 }else if(IS_SUB_8X4(sub_mb_type)){
1898 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1899 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1900 &weight_op[4], &weight_avg[4],
1901 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1902 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1903 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1904 &weight_op[4], &weight_avg[4],
1905 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1906 }else if(IS_SUB_4X8(sub_mb_type)){
1907 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1908 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1909 &weight_op[5], &weight_avg[5],
1910 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1911 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1912 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1913 &weight_op[5], &weight_avg[5],
1914 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1917 assert(IS_SUB_4X4(sub_mb_type));
1919 int sub_x_offset= x_offset + 2*(j&1);
1920 int sub_y_offset= y_offset + (j&2);
1921 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1922 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1923 &weight_op[6], &weight_avg[6],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1930 prefetch_motion(h, 1);
1933 static av_cold void decode_init_vlc(void){
1934 static int done = 0;
1940 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1941 &chroma_dc_coeff_token_len [0], 1, 1,
1942 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1945 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1946 &coeff_token_len [i][0], 1, 1,
1947 &coeff_token_bits[i][0], 1, 1, 1);
1951 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1952 &chroma_dc_total_zeros_len [i][0], 1, 1,
1953 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1955 for(i=0; i<15; i++){
1956 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1957 &total_zeros_len [i][0], 1, 1,
1958 &total_zeros_bits[i][0], 1, 1, 1);
1962 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1963 &run_len [i][0], 1, 1,
1964 &run_bits[i][0], 1, 1, 1);
1966 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1967 &run_len [6][0], 1, 1,
1968 &run_bits[6][0], 1, 1, 1);
1972 static void free_tables(H264Context *h){
1975 av_freep(&h->intra4x4_pred_mode);
1976 av_freep(&h->chroma_pred_mode_table);
1977 av_freep(&h->cbp_table);
1978 av_freep(&h->mvd_table[0]);
1979 av_freep(&h->mvd_table[1]);
1980 av_freep(&h->direct_table);
1981 av_freep(&h->non_zero_count);
1982 av_freep(&h->slice_table_base);
1983 h->slice_table= NULL;
1985 av_freep(&h->mb2b_xy);
1986 av_freep(&h->mb2b8_xy);
1988 for(i = 0; i < MAX_SPS_COUNT; i++)
1989 av_freep(h->sps_buffers + i);
1991 for(i = 0; i < MAX_PPS_COUNT; i++)
1992 av_freep(h->pps_buffers + i);
1994 for(i = 0; i < h->s.avctx->thread_count; i++) {
1995 hx = h->thread_context[i];
1997 av_freep(&hx->top_borders[1]);
1998 av_freep(&hx->top_borders[0]);
1999 av_freep(&hx->s.obmc_scratchpad);
2003 static void init_dequant8_coeff_table(H264Context *h){
2005 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2006 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2007 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2009 for(i=0; i<2; i++ ){
2010 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2011 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2015 for(q=0; q<52; q++){
2016 int shift = ff_div6[q];
2017 int idx = ff_rem6[q];
2019 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2020 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2021 h->pps.scaling_matrix8[i][x]) << shift;
2026 static void init_dequant4_coeff_table(H264Context *h){
2028 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2029 for(i=0; i<6; i++ ){
2030 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2032 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2033 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2040 for(q=0; q<52; q++){
2041 int shift = ff_div6[q] + 2;
2042 int idx = ff_rem6[q];
2044 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2045 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2046 h->pps.scaling_matrix4[i][x]) << shift;
2051 static void init_dequant_tables(H264Context *h){
2053 init_dequant4_coeff_table(h);
2054 if(h->pps.transform_8x8_mode)
2055 init_dequant8_coeff_table(h);
2056 if(h->sps.transform_bypass){
2059 h->dequant4_coeff[i][0][x] = 1<<6;
2060 if(h->pps.transform_8x8_mode)
2063 h->dequant8_coeff[i][0][x] = 1<<6;
2070 * needs width/height
2072 static int alloc_tables(H264Context *h){
2073 MpegEncContext * const s = &h->s;
2074 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2077 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2079 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2080 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2081 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2083 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2084 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2085 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2086 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2088 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2089 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2091 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2092 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2093 for(y=0; y<s->mb_height; y++){
2094 for(x=0; x<s->mb_width; x++){
2095 const int mb_xy= x + y*s->mb_stride;
2096 const int b_xy = 4*x + 4*y*h->b_stride;
2097 const int b8_xy= 2*x + 2*y*h->b8_stride;
2099 h->mb2b_xy [mb_xy]= b_xy;
2100 h->mb2b8_xy[mb_xy]= b8_xy;
2104 s->obmc_scratchpad = NULL;
2106 if(!h->dequant4_coeff[0])
2107 init_dequant_tables(h);
2116 * Mimic alloc_tables(), but for every context thread.
2118 static void clone_tables(H264Context *dst, H264Context *src){
2119 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2120 dst->non_zero_count = src->non_zero_count;
2121 dst->slice_table = src->slice_table;
2122 dst->cbp_table = src->cbp_table;
2123 dst->mb2b_xy = src->mb2b_xy;
2124 dst->mb2b8_xy = src->mb2b8_xy;
2125 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2126 dst->mvd_table[0] = src->mvd_table[0];
2127 dst->mvd_table[1] = src->mvd_table[1];
2128 dst->direct_table = src->direct_table;
2130 dst->s.obmc_scratchpad = NULL;
2131 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2136 * Allocate buffers which are not shared amongst multiple threads.
2138 static int context_init(H264Context *h){
2139 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2140 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2144 return -1; // free_tables will clean up for us
2147 static av_cold void common_init(H264Context *h){
2148 MpegEncContext * const s = &h->s;
2150 s->width = s->avctx->width;
2151 s->height = s->avctx->height;
2152 s->codec_id= s->avctx->codec->id;
2154 ff_h264_pred_init(&h->hpc, s->codec_id);
2156 h->dequant_coeff_pps= -1;
2157 s->unrestricted_mv=1;
2158 s->decode=1; //FIXME
2160 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2161 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2164 static av_cold int decode_init(AVCodecContext *avctx){
2165 H264Context *h= avctx->priv_data;
2166 MpegEncContext * const s = &h->s;
2168 MPV_decode_defaults(s);
2173 s->out_format = FMT_H264;
2174 s->workaround_bugs= avctx->workaround_bugs;
2177 // s->decode_mb= ff_h263_decode_mb;
2178 s->quarter_sample = 1;
2181 if(avctx->codec_id == CODEC_ID_SVQ3)
2182 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2184 avctx->pix_fmt= PIX_FMT_YUV420P;
2188 if(avctx->extradata_size > 0 && avctx->extradata &&
2189 *(char *)avctx->extradata == 1){
2196 h->thread_context[0] = h;
2197 h->outputed_poc = INT_MIN;
2201 static int frame_start(H264Context *h){
2202 MpegEncContext * const s = &h->s;
2205 if(MPV_frame_start(s, s->avctx) < 0)
2207 ff_er_frame_start(s);
2209 * MPV_frame_start uses pict_type to derive key_frame.
2210 * This is incorrect for H.264; IDR markings must be used.
2211 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2212 * See decode_nal_units().
2214 s->current_picture_ptr->key_frame= 0;
2216 assert(s->linesize && s->uvlinesize);
2218 for(i=0; i<16; i++){
2219 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2220 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2223 h->block_offset[16+i]=
2224 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2225 h->block_offset[24+16+i]=
2226 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2229 /* can't be in alloc_tables because linesize isn't known there.
2230 * FIXME: redo bipred weight to not require extra buffer? */
2231 for(i = 0; i < s->avctx->thread_count; i++)
2232 if(!h->thread_context[i]->s.obmc_scratchpad)
2233 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2235 /* some macroblocks will be accessed before they're available */
2236 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2237 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2239 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2241 // We mark the current picture as non-reference after allocating it, so
2242 // that if we break out due to an error it can be released automatically
2243 // in the next MPV_frame_start().
2244 // SVQ3 as well as most other codecs have only last/next/current and thus
2245 // get released even with set reference, besides SVQ3 and others do not
2246 // mark frames as reference later "naturally".
2247 if(s->codec_id != CODEC_ID_SVQ3)
2248 s->current_picture_ptr->reference= 0;
2250 s->current_picture_ptr->field_poc[0]=
2251 s->current_picture_ptr->field_poc[1]= INT_MAX;
2252 assert(s->current_picture_ptr->long_ref==0);
2257 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2258 MpegEncContext * const s = &h->s;
2262 src_cb -= uvlinesize;
2263 src_cr -= uvlinesize;
2265 // There are two lines saved, the line above the the top macroblock of a pair,
2266 // and the line above the bottom macroblock
2267 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2268 for(i=1; i<17; i++){
2269 h->left_border[i]= src_y[15+i* linesize];
2272 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2273 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2275 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2276 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2277 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2279 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2280 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2282 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2283 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2287 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2288 MpegEncContext * const s = &h->s;
2295 if(h->deblocking_filter == 2) {
2297 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2298 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2300 deblock_left = (s->mb_x > 0);
2301 deblock_top = (s->mb_y > 0);
2304 src_y -= linesize + 1;
2305 src_cb -= uvlinesize + 1;
2306 src_cr -= uvlinesize + 1;
2308 #define XCHG(a,b,t,xchg)\
2315 for(i = !deblock_top; i<17; i++){
2316 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2321 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2322 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2323 if(s->mb_x+1 < s->mb_width){
2324 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2328 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2330 for(i = !deblock_top; i<9; i++){
2331 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2332 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2336 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2337 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2342 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2343 MpegEncContext * const s = &h->s;
2346 src_y -= 2 * linesize;
2347 src_cb -= 2 * uvlinesize;
2348 src_cr -= 2 * uvlinesize;
2350 // There are two lines saved, the line above the the top macroblock of a pair,
2351 // and the line above the bottom macroblock
2352 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2353 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2354 for(i=2; i<34; i++){
2355 h->left_border[i]= src_y[15+i* linesize];
2358 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2359 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2360 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2361 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2363 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2364 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2365 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2366 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2367 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2368 for(i=2; i<18; i++){
2369 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2370 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2372 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2373 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2374 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2375 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2379 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2380 MpegEncContext * const s = &h->s;
2383 int deblock_left = (s->mb_x > 0);
2384 int deblock_top = (s->mb_y > 1);
2386 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2388 src_y -= 2 * linesize + 1;
2389 src_cb -= 2 * uvlinesize + 1;
2390 src_cr -= 2 * uvlinesize + 1;
2392 #define XCHG(a,b,t,xchg)\
2399 for(i = (!deblock_top)<<1; i<34; i++){
2400 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2405 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2406 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2407 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2408 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2409 if(s->mb_x+1 < s->mb_width){
2410 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2411 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2415 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2417 for(i = (!deblock_top) << 1; i<18; i++){
2418 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2419 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2423 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2424 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2425 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2426 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2431 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2432 MpegEncContext * const s = &h->s;
2433 const int mb_x= s->mb_x;
2434 const int mb_y= s->mb_y;
2435 const int mb_xy= h->mb_xy;
2436 const int mb_type= s->current_picture.mb_type[mb_xy];
2437 uint8_t *dest_y, *dest_cb, *dest_cr;
2438 int linesize, uvlinesize /*dct_offset*/;
2440 int *block_offset = &h->block_offset[0];
2441 const unsigned int bottom = mb_y & 1;
2442 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2443 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2444 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2446 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2447 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2448 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2450 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2451 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2453 if (!simple && MB_FIELD) {
2454 linesize = h->mb_linesize = s->linesize * 2;
2455 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2456 block_offset = &h->block_offset[24];
2457 if(mb_y&1){ //FIXME move out of this function?
2458 dest_y -= s->linesize*15;
2459 dest_cb-= s->uvlinesize*7;
2460 dest_cr-= s->uvlinesize*7;
2464 for(list=0; list<h->list_count; list++){
2465 if(!USES_LIST(mb_type, list))
2467 if(IS_16X16(mb_type)){
2468 int8_t *ref = &h->ref_cache[list][scan8[0]];
2469 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2471 for(i=0; i<16; i+=4){
2472 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2473 int ref = h->ref_cache[list][scan8[i]];
2475 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2481 linesize = h->mb_linesize = s->linesize;
2482 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2483 // dct_offset = s->linesize * 16;
2486 if(transform_bypass){
2488 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2489 }else if(IS_8x8DCT(mb_type)){
2490 idct_dc_add = s->dsp.h264_idct8_dc_add;
2491 idct_add = s->dsp.h264_idct8_add;
2493 idct_dc_add = s->dsp.h264_idct_dc_add;
2494 idct_add = s->dsp.h264_idct_add;
2497 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2498 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2499 int mbt_y = mb_y&~1;
2500 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2501 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2502 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2503 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2506 if (!simple && IS_INTRA_PCM(mb_type)) {
2507 for (i=0; i<16; i++) {
2508 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2510 for (i=0; i<8; i++) {
2511 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2512 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2515 if(IS_INTRA(mb_type)){
2516 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2517 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2519 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2520 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2521 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2524 if(IS_INTRA4x4(mb_type)){
2525 if(simple || !s->encoding){
2526 if(IS_8x8DCT(mb_type)){
2527 for(i=0; i<16; i+=4){
2528 uint8_t * const ptr= dest_y + block_offset[i];
2529 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2530 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2531 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2532 (h->topright_samples_available<<i)&0x4000, linesize);
2534 if(nnz == 1 && h->mb[i*16])
2535 idct_dc_add(ptr, h->mb + i*16, linesize);
2537 idct_add(ptr, h->mb + i*16, linesize);
2541 for(i=0; i<16; i++){
2542 uint8_t * const ptr= dest_y + block_offset[i];
2544 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2547 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2548 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2549 assert(mb_y || linesize <= block_offset[i]);
2550 if(!topright_avail){
2551 tr= ptr[3 - linesize]*0x01010101;
2552 topright= (uint8_t*) &tr;
2554 topright= ptr + 4 - linesize;
2558 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2559 nnz = h->non_zero_count_cache[ scan8[i] ];
2562 if(nnz == 1 && h->mb[i*16])
2563 idct_dc_add(ptr, h->mb + i*16, linesize);
2565 idct_add(ptr, h->mb + i*16, linesize);
2567 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2572 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2574 if(!transform_bypass)
2575 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2577 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2579 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2580 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2582 hl_motion(h, dest_y, dest_cb, dest_cr,
2583 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2584 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2585 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2589 if(!IS_INTRA4x4(mb_type)){
2591 if(IS_INTRA16x16(mb_type)){
2592 for(i=0; i<16; i++){
2593 if(h->non_zero_count_cache[ scan8[i] ])
2594 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2595 else if(h->mb[i*16])
2596 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2599 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2600 for(i=0; i<16; i+=di){
2601 int nnz = h->non_zero_count_cache[ scan8[i] ];
2603 if(nnz==1 && h->mb[i*16])
2604 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2606 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2611 for(i=0; i<16; i++){
2612 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2613 uint8_t * const ptr= dest_y + block_offset[i];
2614 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2620 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2621 uint8_t *dest[2] = {dest_cb, dest_cr};
2622 if(transform_bypass){
2623 idct_add = idct_dc_add = s->dsp.add_pixels4;
2625 idct_add = s->dsp.h264_idct_add;
2626 idct_dc_add = s->dsp.h264_idct_dc_add;
2627 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2628 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2631 for(i=16; i<16+8; i++){
2632 if(h->non_zero_count_cache[ scan8[i] ])
2633 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2634 else if(h->mb[i*16])
2635 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2638 for(i=16; i<16+8; i++){
2639 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2640 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2641 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2647 if(h->deblocking_filter) {
2648 if (!simple && FRAME_MBAFF) {
2649 //FIXME try deblocking one mb at a time?
2650 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2651 const int mb_y = s->mb_y - 1;
2652 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2653 const int mb_xy= mb_x + mb_y*s->mb_stride;
2654 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2655 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2656 if (!bottom) return;
2657 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2658 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2659 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2661 if(IS_INTRA(mb_type_top | mb_type_bottom))
2662 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2664 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2667 s->mb_y--; h->mb_xy -= s->mb_stride;
2668 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2669 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2670 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2671 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2672 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2674 s->mb_y++; h->mb_xy += s->mb_stride;
2675 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2676 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2677 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2678 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2679 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2681 tprintf(h->s.avctx, "call filter_mb\n");
2682 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2683 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2684 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2685 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2686 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2692 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2694 static void hl_decode_mb_simple(H264Context *h){
2695 hl_decode_mb_internal(h, 1);
2699 * Process a macroblock; this handles edge cases, such as interlacing.
2701 static void av_noinline hl_decode_mb_complex(H264Context *h){
2702 hl_decode_mb_internal(h, 0);
2705 static void hl_decode_mb(H264Context *h){
2706 MpegEncContext * const s = &h->s;
2707 const int mb_xy= h->mb_xy;
2708 const int mb_type= s->current_picture.mb_type[mb_xy];
2709 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2710 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2712 if(ENABLE_H264_ENCODER && !s->decode)
2716 hl_decode_mb_complex(h);
2717 else hl_decode_mb_simple(h);
2720 static void pic_as_field(Picture *pic, const int parity){
2722 for (i = 0; i < 4; ++i) {
2723 if (parity == PICT_BOTTOM_FIELD)
2724 pic->data[i] += pic->linesize[i];
2725 pic->reference = parity;
2726 pic->linesize[i] *= 2;
2728 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2731 static int split_field_copy(Picture *dest, Picture *src,
2732 int parity, int id_add){
2733 int match = !!(src->reference & parity);
2737 if(parity != PICT_FRAME){
2738 pic_as_field(dest, parity);
2740 dest->pic_id += id_add;
2747 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2751 while(i[0]<len || i[1]<len){
2752 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2754 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2757 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2758 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2761 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2762 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2769 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2774 best_poc= dir ? INT_MIN : INT_MAX;
2776 for(i=0; i<len; i++){
2777 const int poc= src[i]->poc;
2778 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2780 sorted[out_i]= src[i];
2783 if(best_poc == (dir ? INT_MIN : INT_MAX))
2785 limit= sorted[out_i++]->poc - dir;
2791 * fills the default_ref_list.
2793 static int fill_default_ref_list(H264Context *h){
2794 MpegEncContext * const s = &h->s;
2797 if(h->slice_type_nos==FF_B_TYPE){
2798 Picture *sorted[32];
2803 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2805 cur_poc= s->current_picture_ptr->poc;
2807 for(list= 0; list<2; list++){
2808 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2809 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2811 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2812 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2815 if(len < h->ref_count[list])
2816 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2820 if(lens[0] == lens[1] && lens[1] > 1){
2821 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2823 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2826 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2827 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2829 if(len < h->ref_count[0])
2830 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2833 for (i=0; i<h->ref_count[0]; i++) {
2834 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2836 if(h->slice_type_nos==FF_B_TYPE){
2837 for (i=0; i<h->ref_count[1]; i++) {
2838 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2845 static void print_short_term(H264Context *h);
2846 static void print_long_term(H264Context *h);
2849 * Extract structure information about the picture described by pic_num in
2850 * the current decoding context (frame or field). Note that pic_num is
2851 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2852 * @param pic_num picture number for which to extract structure information
2853 * @param structure one of PICT_XXX describing structure of picture
2855 * @return frame number (short term) or long term index of picture
2856 * described by pic_num
2858 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2859 MpegEncContext * const s = &h->s;
2861 *structure = s->picture_structure;
2864 /* opposite field */
2865 *structure ^= PICT_FRAME;
2872 static int decode_ref_pic_list_reordering(H264Context *h){
2873 MpegEncContext * const s = &h->s;
2874 int list, index, pic_structure;
2876 print_short_term(h);
2879 for(list=0; list<h->list_count; list++){
2880 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2882 if(get_bits1(&s->gb)){
2883 int pred= h->curr_pic_num;
2885 for(index=0; ; index++){
2886 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2887 unsigned int pic_id;
2889 Picture *ref = NULL;
2891 if(reordering_of_pic_nums_idc==3)
2894 if(index >= h->ref_count[list]){
2895 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2899 if(reordering_of_pic_nums_idc<3){
2900 if(reordering_of_pic_nums_idc<2){
2901 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2904 if(abs_diff_pic_num > h->max_pic_num){
2905 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2909 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2910 else pred+= abs_diff_pic_num;
2911 pred &= h->max_pic_num - 1;
2913 frame_num = pic_num_extract(h, pred, &pic_structure);
2915 for(i= h->short_ref_count-1; i>=0; i--){
2916 ref = h->short_ref[i];
2917 assert(ref->reference);
2918 assert(!ref->long_ref);
2920 ref->frame_num == frame_num &&
2921 (ref->reference & pic_structure)
2929 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2931 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2934 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2937 ref = h->long_ref[long_idx];
2938 assert(!(ref && !ref->reference));
2939 if(ref && (ref->reference & pic_structure)){
2940 ref->pic_id= pic_id;
2941 assert(ref->long_ref);
2949 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2950 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2952 for(i=index; i+1<h->ref_count[list]; i++){
2953 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2956 for(; i > index; i--){
2957 h->ref_list[list][i]= h->ref_list[list][i-1];
2959 h->ref_list[list][index]= *ref;
2961 pic_as_field(&h->ref_list[list][index], pic_structure);
2965 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2971 for(list=0; list<h->list_count; list++){
2972 for(index= 0; index < h->ref_count[list]; index++){
2973 if(!h->ref_list[list][index].data[0]){
2974 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2975 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2980 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
2981 direct_dist_scale_factor(h);
2982 direct_ref_list_init(h);
2986 static void fill_mbaff_ref_list(H264Context *h){
2988 for(list=0; list<2; list++){ //FIXME try list_count
2989 for(i=0; i<h->ref_count[list]; i++){
2990 Picture *frame = &h->ref_list[list][i];
2991 Picture *field = &h->ref_list[list][16+2*i];
2994 field[0].linesize[j] <<= 1;
2995 field[0].reference = PICT_TOP_FIELD;
2996 field[1] = field[0];
2998 field[1].data[j] += frame->linesize[j];
2999 field[1].reference = PICT_BOTTOM_FIELD;
3001 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3002 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3004 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3005 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3009 for(j=0; j<h->ref_count[1]; j++){
3010 for(i=0; i<h->ref_count[0]; i++)
3011 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3012 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3013 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3017 static int pred_weight_table(H264Context *h){
3018 MpegEncContext * const s = &h->s;
3020 int luma_def, chroma_def;
3023 h->use_weight_chroma= 0;
3024 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3025 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3026 luma_def = 1<<h->luma_log2_weight_denom;
3027 chroma_def = 1<<h->chroma_log2_weight_denom;
3029 for(list=0; list<2; list++){
3030 for(i=0; i<h->ref_count[list]; i++){
3031 int luma_weight_flag, chroma_weight_flag;
3033 luma_weight_flag= get_bits1(&s->gb);
3034 if(luma_weight_flag){
3035 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3036 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3037 if( h->luma_weight[list][i] != luma_def
3038 || h->luma_offset[list][i] != 0)
3041 h->luma_weight[list][i]= luma_def;
3042 h->luma_offset[list][i]= 0;
3046 chroma_weight_flag= get_bits1(&s->gb);
3047 if(chroma_weight_flag){
3050 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3051 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3052 if( h->chroma_weight[list][i][j] != chroma_def
3053 || h->chroma_offset[list][i][j] != 0)
3054 h->use_weight_chroma= 1;
3059 h->chroma_weight[list][i][j]= chroma_def;
3060 h->chroma_offset[list][i][j]= 0;
3065 if(h->slice_type_nos != FF_B_TYPE) break;
3067 h->use_weight= h->use_weight || h->use_weight_chroma;
3071 static void implicit_weight_table(H264Context *h){
3072 MpegEncContext * const s = &h->s;
3074 int cur_poc = s->current_picture_ptr->poc;
3076 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3077 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3079 h->use_weight_chroma= 0;
3084 h->use_weight_chroma= 2;
3085 h->luma_log2_weight_denom= 5;
3086 h->chroma_log2_weight_denom= 5;
3088 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3089 int poc0 = h->ref_list[0][ref0].poc;
3090 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3091 int poc1 = h->ref_list[1][ref1].poc;
3092 int td = av_clip(poc1 - poc0, -128, 127);
3094 int tb = av_clip(cur_poc - poc0, -128, 127);
3095 int tx = (16384 + (FFABS(td) >> 1)) / td;
3096 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3097 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3098 h->implicit_weight[ref0][ref1] = 32;
3100 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3102 h->implicit_weight[ref0][ref1] = 32;
3108 * Mark a picture as no longer needed for reference. The refmask
3109 * argument allows unreferencing of individual fields or the whole frame.
3110 * If the picture becomes entirely unreferenced, but is being held for
3111 * display purposes, it is marked as such.
3112 * @param refmask mask of fields to unreference; the mask is bitwise
3113 * anded with the reference marking of pic
3114 * @return non-zero if pic becomes entirely unreferenced (except possibly
3115 * for display purposes) zero if one of the fields remains in
3118 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3120 if (pic->reference &= refmask) {
3123 for(i = 0; h->delayed_pic[i]; i++)
3124 if(pic == h->delayed_pic[i]){
3125 pic->reference=DELAYED_PIC_REF;
3133 * instantaneous decoder refresh.
3135 static void idr(H264Context *h){
3138 for(i=0; i<16; i++){
3139 remove_long(h, i, 0);
3141 assert(h->long_ref_count==0);
3143 for(i=0; i<h->short_ref_count; i++){
3144 unreference_pic(h, h->short_ref[i], 0);
3145 h->short_ref[i]= NULL;
3147 h->short_ref_count=0;
3148 h->prev_frame_num= 0;
3149 h->prev_frame_num_offset= 0;
3154 /* forget old pics after a seek */
3155 static void flush_dpb(AVCodecContext *avctx){
3156 H264Context *h= avctx->priv_data;
3158 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3159 if(h->delayed_pic[i])
3160 h->delayed_pic[i]->reference= 0;
3161 h->delayed_pic[i]= NULL;
3163 h->outputed_poc= INT_MIN;
3165 if(h->s.current_picture_ptr)
3166 h->s.current_picture_ptr->reference= 0;
3167 h->s.first_field= 0;
3168 ff_mpeg_flush(avctx);
3172 * Find a Picture in the short term reference list by frame number.
3173 * @param frame_num frame number to search for
3174 * @param idx the index into h->short_ref where returned picture is found
3175 * undefined if no picture found.
3176 * @return pointer to the found picture, or NULL if no pic with the provided
3177 * frame number is found
3179 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3180 MpegEncContext * const s = &h->s;
3183 for(i=0; i<h->short_ref_count; i++){
3184 Picture *pic= h->short_ref[i];
3185 if(s->avctx->debug&FF_DEBUG_MMCO)
3186 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3187 if(pic->frame_num == frame_num) {
3196 * Remove a picture from the short term reference list by its index in
3197 * that list. This does no checking on the provided index; it is assumed
3198 * to be valid. Other list entries are shifted down.
3199 * @param i index into h->short_ref of picture to remove.
3201 static void remove_short_at_index(H264Context *h, int i){
3202 assert(i >= 0 && i < h->short_ref_count);
3203 h->short_ref[i]= NULL;
3204 if (--h->short_ref_count)
3205 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3210 * @return the removed picture or NULL if an error occurs
3212 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3213 MpegEncContext * const s = &h->s;
3217 if(s->avctx->debug&FF_DEBUG_MMCO)
3218 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3220 pic = find_short(h, frame_num, &i);
3222 if(unreference_pic(h, pic, ref_mask))
3223 remove_short_at_index(h, i);
3230 * Remove a picture from the long term reference list by its index in
3232 * @return the removed picture or NULL if an error occurs
3234 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3237 pic= h->long_ref[i];
3239 if(unreference_pic(h, pic, ref_mask)){
3240 assert(h->long_ref[i]->long_ref == 1);
3241 h->long_ref[i]->long_ref= 0;
3242 h->long_ref[i]= NULL;
3243 h->long_ref_count--;
3251 * print short term list
3253 static void print_short_term(H264Context *h) {
3255 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3256 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3257 for(i=0; i<h->short_ref_count; i++){
3258 Picture *pic= h->short_ref[i];
3259 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3265 * print long term list
3267 static void print_long_term(H264Context *h) {
3269 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3270 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3271 for(i = 0; i < 16; i++){
3272 Picture *pic= h->long_ref[i];
3274 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3281 * Executes the reference picture marking (memory management control operations).
3283 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3284 MpegEncContext * const s = &h->s;
3286 int current_ref_assigned=0;
3289 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3290 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3292 for(i=0; i<mmco_count; i++){
3293 int structure, frame_num;
3294 if(s->avctx->debug&FF_DEBUG_MMCO)
3295 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3297 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3298 || mmco[i].opcode == MMCO_SHORT2LONG){
3299 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3300 pic = find_short(h, frame_num, &j);
3302 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3303 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3304 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3309 switch(mmco[i].opcode){
3310 case MMCO_SHORT2UNUSED:
3311 if(s->avctx->debug&FF_DEBUG_MMCO)
3312 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3313 remove_short(h, frame_num, structure ^ PICT_FRAME);
3315 case MMCO_SHORT2LONG:
3316 if (h->long_ref[mmco[i].long_arg] != pic)
3317 remove_long(h, mmco[i].long_arg, 0);
3319 remove_short_at_index(h, j);
3320 h->long_ref[ mmco[i].long_arg ]= pic;
3321 if (h->long_ref[ mmco[i].long_arg ]){
3322 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3323 h->long_ref_count++;
3326 case MMCO_LONG2UNUSED:
3327 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3328 pic = h->long_ref[j];
3330 remove_long(h, j, structure ^ PICT_FRAME);
3331 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3332 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3335 // Comment below left from previous code as it is an interresting note.
3336 /* First field in pair is in short term list or
3337 * at a different long term index.
3338 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3339 * Report the problem and keep the pair where it is,
3340 * and mark this field valid.
3343 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3344 remove_long(h, mmco[i].long_arg, 0);
3346 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3347 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3348 h->long_ref_count++;
3351 s->current_picture_ptr->reference |= s->picture_structure;
3352 current_ref_assigned=1;
3354 case MMCO_SET_MAX_LONG:
3355 assert(mmco[i].long_arg <= 16);
3356 // just remove the long term which index is greater than new max
3357 for(j = mmco[i].long_arg; j<16; j++){
3358 remove_long(h, j, 0);
3362 while(h->short_ref_count){
3363 remove_short(h, h->short_ref[0]->frame_num, 0);
3365 for(j = 0; j < 16; j++) {
3366 remove_long(h, j, 0);
3368 s->current_picture_ptr->poc=
3369 s->current_picture_ptr->field_poc[0]=
3370 s->current_picture_ptr->field_poc[1]=
3374 s->current_picture_ptr->frame_num= 0;
3380 if (!current_ref_assigned) {
3381 /* Second field of complementary field pair; the first field of
3382 * which is already referenced. If short referenced, it
3383 * should be first entry in short_ref. If not, it must exist
3384 * in long_ref; trying to put it on the short list here is an
3385 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3387 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3388 /* Just mark the second field valid */
3389 s->current_picture_ptr->reference = PICT_FRAME;
3390 } else if (s->current_picture_ptr->long_ref) {
3391 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3392 "assignment for second field "
3393 "in complementary field pair "
3394 "(first field is long term)\n");
3396 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3398 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3401 if(h->short_ref_count)
3402 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3404 h->short_ref[0]= s->current_picture_ptr;
3405 h->short_ref_count++;
3406 s->current_picture_ptr->reference |= s->picture_structure;
3410 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3412 /* We have too many reference frames, probably due to corrupted
3413 * stream. Need to discard one frame. Prevents overrun of the
3414 * short_ref and long_ref buffers.
3416 av_log(h->s.avctx, AV_LOG_ERROR,
3417 "number of reference frames exceeds max (probably "
3418 "corrupt input), discarding one\n");
3420 if (h->long_ref_count && !h->short_ref_count) {
3421 for (i = 0; i < 16; ++i)
3426 remove_long(h, i, 0);
3428 pic = h->short_ref[h->short_ref_count - 1];
3429 remove_short(h, pic->frame_num, 0);
3433 print_short_term(h);
3438 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3439 MpegEncContext * const s = &h->s;
3443 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3444 s->broken_link= get_bits1(gb) -1;
3446 h->mmco[0].opcode= MMCO_LONG;
3447 h->mmco[0].long_arg= 0;
3451 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3452 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3453 MMCOOpcode opcode= get_ue_golomb(gb);
3455 h->mmco[i].opcode= opcode;
3456 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3457 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3458 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3459 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3463 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3464 unsigned int long_arg= get_ue_golomb(gb);
3465 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3466 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3469 h->mmco[i].long_arg= long_arg;
3472 if(opcode > (unsigned)MMCO_LONG){
3473 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3476 if(opcode == MMCO_END)
3481 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3483 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3484 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3485 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3486 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3488 if (FIELD_PICTURE) {
3489 h->mmco[0].short_pic_num *= 2;
3490 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3491 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3501 static int init_poc(H264Context *h){
3502 MpegEncContext * const s = &h->s;
3503 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3505 Picture *cur = s->current_picture_ptr;
3507 h->frame_num_offset= h->prev_frame_num_offset;
3508 if(h->frame_num < h->prev_frame_num)
3509 h->frame_num_offset += max_frame_num;
3511 if(h->sps.poc_type==0){
3512 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3514 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3515 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3516 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3517 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3519 h->poc_msb = h->prev_poc_msb;
3520 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3522 field_poc[1] = h->poc_msb + h->poc_lsb;
3523 if(s->picture_structure == PICT_FRAME)
3524 field_poc[1] += h->delta_poc_bottom;
3525 }else if(h->sps.poc_type==1){
3526 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3529 if(h->sps.poc_cycle_length != 0)
3530 abs_frame_num = h->frame_num_offset + h->frame_num;
3534 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3537 expected_delta_per_poc_cycle = 0;
3538 for(i=0; i < h->sps.poc_cycle_length; i++)
3539 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3541 if(abs_frame_num > 0){
3542 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3543 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3545 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3546 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3547 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3551 if(h->nal_ref_idc == 0)
3552 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3554 field_poc[0] = expectedpoc + h->delta_poc[0];
3555 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3557 if(s->picture_structure == PICT_FRAME)
3558 field_poc[1] += h->delta_poc[1];
3560 int poc= 2*(h->frame_num_offset + h->frame_num);
3569 if(s->picture_structure != PICT_BOTTOM_FIELD)
3570 s->current_picture_ptr->field_poc[0]= field_poc[0];
3571 if(s->picture_structure != PICT_TOP_FIELD)
3572 s->current_picture_ptr->field_poc[1]= field_poc[1];
3573 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3580 * initialize scan tables
3582 static void init_scan_tables(H264Context *h){
3583 MpegEncContext * const s = &h->s;
3585 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3586 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3587 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3589 for(i=0; i<16; i++){
3590 #define T(x) (x>>2) | ((x<<2) & 0xF)
3591 h->zigzag_scan[i] = T(zigzag_scan[i]);
3592 h-> field_scan[i] = T( field_scan[i]);
3596 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3597 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3598 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3599 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3600 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3602 for(i=0; i<64; i++){
3603 #define T(x) (x>>3) | ((x&7)<<3)
3604 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3605 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3606 h->field_scan8x8[i] = T(field_scan8x8[i]);
3607 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3611 if(h->sps.transform_bypass){ //FIXME same ugly
3612 h->zigzag_scan_q0 = zigzag_scan;
3613 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3614 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3615 h->field_scan_q0 = field_scan;
3616 h->field_scan8x8_q0 = field_scan8x8;
3617 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3619 h->zigzag_scan_q0 = h->zigzag_scan;
3620 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3621 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3622 h->field_scan_q0 = h->field_scan;
3623 h->field_scan8x8_q0 = h->field_scan8x8;
3624 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3629 * Replicates H264 "master" context to thread contexts.
3631 static void clone_slice(H264Context *dst, H264Context *src)
3633 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3634 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3635 dst->s.current_picture = src->s.current_picture;
3636 dst->s.linesize = src->s.linesize;
3637 dst->s.uvlinesize = src->s.uvlinesize;
3638 dst->s.first_field = src->s.first_field;
3640 dst->prev_poc_msb = src->prev_poc_msb;
3641 dst->prev_poc_lsb = src->prev_poc_lsb;
3642 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3643 dst->prev_frame_num = src->prev_frame_num;
3644 dst->short_ref_count = src->short_ref_count;
3646 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3647 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3648 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3649 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3651 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3652 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3656 * decodes a slice header.
3657 * This will also call MPV_common_init() and frame_start() as needed.
3659 * @param h h264context
3660 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3662 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3664 static int decode_slice_header(H264Context *h, H264Context *h0){
3665 MpegEncContext * const s = &h->s;
3666 MpegEncContext * const s0 = &h0->s;
3667 unsigned int first_mb_in_slice;
3668 unsigned int pps_id;
3669 int num_ref_idx_active_override_flag;
3670 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3671 unsigned int slice_type, tmp, i, j;
3672 int default_ref_list_done = 0;
3673 int last_pic_structure;
3675 s->dropable= h->nal_ref_idc == 0;
3677 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3678 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3679 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3681 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3682 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3685 first_mb_in_slice= get_ue_golomb(&s->gb);
3687 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3688 h0->current_slice = 0;
3689 if (!s0->first_field)
3690 s->current_picture_ptr= NULL;
3693 slice_type= get_ue_golomb(&s->gb);
3695 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3700 h->slice_type_fixed=1;
3702 h->slice_type_fixed=0;
3704 slice_type= slice_type_map[ slice_type ];
3705 if (slice_type == FF_I_TYPE
3706 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3707 default_ref_list_done = 1;
3709 h->slice_type= slice_type;
3710 h->slice_type_nos= slice_type & 3;
3712 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3713 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3714 av_log(h->s.avctx, AV_LOG_ERROR,
3715 "B picture before any references, skipping\n");
3719 pps_id= get_ue_golomb(&s->gb);
3720 if(pps_id>=MAX_PPS_COUNT){
3721 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3724 if(!h0->pps_buffers[pps_id]) {
3725 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3728 h->pps= *h0->pps_buffers[pps_id];
3730 if(!h0->sps_buffers[h->pps.sps_id]) {
3731 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3734 h->sps = *h0->sps_buffers[h->pps.sps_id];
3736 if(h == h0 && h->dequant_coeff_pps != pps_id){
3737 h->dequant_coeff_pps = pps_id;
3738 init_dequant_tables(h);
3741 s->mb_width= h->sps.mb_width;
3742 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3744 h->b_stride= s->mb_width*4;
3745 h->b8_stride= s->mb_width*2;
3747 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3748 if(h->sps.frame_mbs_only_flag)
3749 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3751 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3753 if (s->context_initialized
3754 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3756 return -1; // width / height changed during parallelized decoding
3760 if (!s->context_initialized) {
3762 return -1; // we cant (re-)initialize context during parallel decoding
3763 if (MPV_common_init(s) < 0)
3767 init_scan_tables(h);
3770 for(i = 1; i < s->avctx->thread_count; i++) {
3772 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3773 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3774 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3777 init_scan_tables(c);
3781 for(i = 0; i < s->avctx->thread_count; i++)
3782 if(context_init(h->thread_context[i]) < 0)
3785 s->avctx->width = s->width;
3786 s->avctx->height = s->height;
3787 s->avctx->sample_aspect_ratio= h->sps.sar;
3788 if(!s->avctx->sample_aspect_ratio.den)
3789 s->avctx->sample_aspect_ratio.den = 1;
3791 if(h->sps.timing_info_present_flag){
3792 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3793 if(h->x264_build > 0 && h->x264_build < 44)
3794 s->avctx->time_base.den *= 2;
3795 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3796 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3800 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3803 h->mb_aff_frame = 0;
3804 last_pic_structure = s0->picture_structure;
3805 if(h->sps.frame_mbs_only_flag){
3806 s->picture_structure= PICT_FRAME;
3808 if(get_bits1(&s->gb)) { //field_pic_flag
3809 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3811 s->picture_structure= PICT_FRAME;
3812 h->mb_aff_frame = h->sps.mb_aff;
3816 if(h0->current_slice == 0){
3817 while(h->frame_num != h->prev_frame_num &&
3818 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3819 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3821 h->prev_frame_num++;
3822 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3823 s->current_picture_ptr->frame_num= h->prev_frame_num;
3824 execute_ref_pic_marking(h, NULL, 0);
3827 /* See if we have a decoded first field looking for a pair... */
3828 if (s0->first_field) {
3829 assert(s0->current_picture_ptr);
3830 assert(s0->current_picture_ptr->data[0]);
3831 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3833 /* figure out if we have a complementary field pair */
3834 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3836 * Previous field is unmatched. Don't display it, but let it
3837 * remain for reference if marked as such.
3839 s0->current_picture_ptr = NULL;
3840 s0->first_field = FIELD_PICTURE;
3843 if (h->nal_ref_idc &&
3844 s0->current_picture_ptr->reference &&
3845 s0->current_picture_ptr->frame_num != h->frame_num) {
3847 * This and previous field were reference, but had
3848 * different frame_nums. Consider this field first in
3849 * pair. Throw away previous field except for reference
3852 s0->first_field = 1;
3853 s0->current_picture_ptr = NULL;
3856 /* Second field in complementary pair */
3857 s0->first_field = 0;
3862 /* Frame or first field in a potentially complementary pair */
3863 assert(!s0->current_picture_ptr);
3864 s0->first_field = FIELD_PICTURE;
3867 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3868 s0->first_field = 0;
3875 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3877 assert(s->mb_num == s->mb_width * s->mb_height);
3878 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3879 first_mb_in_slice >= s->mb_num){
3880 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3883 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3884 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3885 if (s->picture_structure == PICT_BOTTOM_FIELD)
3886 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3887 assert(s->mb_y < s->mb_height);
3889 if(s->picture_structure==PICT_FRAME){
3890 h->curr_pic_num= h->frame_num;
3891 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3893 h->curr_pic_num= 2*h->frame_num + 1;
3894 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3897 if(h->nal_unit_type == NAL_IDR_SLICE){
3898 get_ue_golomb(&s->gb); /* idr_pic_id */
3901 if(h->sps.poc_type==0){
3902 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3904 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3905 h->delta_poc_bottom= get_se_golomb(&s->gb);
3909 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3910 h->delta_poc[0]= get_se_golomb(&s->gb);
3912 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3913 h->delta_poc[1]= get_se_golomb(&s->gb);
3918 if(h->pps.redundant_pic_cnt_present){
3919 h->redundant_pic_count= get_ue_golomb(&s->gb);
3922 //set defaults, might be overridden a few lines later
3923 h->ref_count[0]= h->pps.ref_count[0];
3924 h->ref_count[1]= h->pps.ref_count[1];
3926 if(h->slice_type_nos != FF_I_TYPE){
3927 if(h->slice_type_nos == FF_B_TYPE){
3928 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3930 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3932 if(num_ref_idx_active_override_flag){
3933 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3934 if(h->slice_type_nos==FF_B_TYPE)
3935 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3937 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3938 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3939 h->ref_count[0]= h->ref_count[1]= 1;
3943 if(h->slice_type_nos == FF_B_TYPE)
3950 if(!default_ref_list_done){
3951 fill_default_ref_list(h);
3954 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3957 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3958 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3959 pred_weight_table(h);
3960 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3961 implicit_weight_table(h);
3966 decode_ref_pic_marking(h0, &s->gb);
3969 fill_mbaff_ref_list(h);
3971 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3972 tmp = get_ue_golomb(&s->gb);
3974 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3977 h->cabac_init_idc= tmp;
3980 h->last_qscale_diff = 0;
3981 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3983 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3987 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3988 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3989 //FIXME qscale / qp ... stuff
3990 if(h->slice_type == FF_SP_TYPE){
3991 get_bits1(&s->gb); /* sp_for_switch_flag */
3993 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3994 get_se_golomb(&s->gb); /* slice_qs_delta */
3997 h->deblocking_filter = 1;
3998 h->slice_alpha_c0_offset = 0;
3999 h->slice_beta_offset = 0;
4000 if( h->pps.deblocking_filter_parameters_present ) {
4001 tmp= get_ue_golomb(&s->gb);
4003 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4006 h->deblocking_filter= tmp;
4007 if(h->deblocking_filter < 2)
4008 h->deblocking_filter^= 1; // 1<->0
4010 if( h->deblocking_filter ) {
4011 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4012 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4016 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4017 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4018 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4019 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4020 h->deblocking_filter= 0;
4022 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4023 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4024 /* Cheat slightly for speed:
4025 Do not bother to deblock across slices. */
4026 h->deblocking_filter = 2;
4028 h0->max_contexts = 1;
4029 if(!h0->single_decode_warning) {
4030 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4031 h0->single_decode_warning = 1;
4034 return 1; // deblocking switched inside frame
4039 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4040 slice_group_change_cycle= get_bits(&s->gb, ?);
4043 h0->last_slice_type = slice_type;
4044 h->slice_num = ++h0->current_slice;
4047 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4051 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4052 +(h->ref_list[j][i].reference&3);
4055 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4056 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4058 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4059 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4061 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4063 av_get_pict_type_char(h->slice_type),
4064 pps_id, h->frame_num,
4065 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4066 h->ref_count[0], h->ref_count[1],
4068 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4070 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4071 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4081 static inline int get_level_prefix(GetBitContext *gb){
4085 OPEN_READER(re, gb);
4086 UPDATE_CACHE(re, gb);
4087 buf=GET_CACHE(re, gb);
4089 log= 32 - av_log2(buf);
4091 print_bin(buf>>(32-log), log);
4092 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4095 LAST_SKIP_BITS(re, gb, log);
4096 CLOSE_READER(re, gb);
4101 static inline int get_dct8x8_allowed(H264Context *h){
4104 if(!IS_SUB_8X8(h->sub_mb_type[i])
4105 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4112 * decodes a residual block.
4113 * @param n block index
4114 * @param scantable scantable
4115 * @param max_coeff number of coefficients in the block
4116 * @return <0 if an error occurred
4118 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4119 MpegEncContext * const s = &h->s;
4120 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4122 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4124 //FIXME put trailing_onex into the context
4126 if(n == CHROMA_DC_BLOCK_INDEX){
4127 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4128 total_coeff= coeff_token>>2;
4130 if(n == LUMA_DC_BLOCK_INDEX){
4131 total_coeff= pred_non_zero_count(h, 0);
4132 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4133 total_coeff= coeff_token>>2;
4135 total_coeff= pred_non_zero_count(h, n);
4136 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4137 total_coeff= coeff_token>>2;
4138 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4142 //FIXME set last_non_zero?
4146 if(total_coeff > (unsigned)max_coeff) {
4147 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4151 trailing_ones= coeff_token&3;
4152 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4153 assert(total_coeff<=16);
4155 for(i=0; i<trailing_ones; i++){
4156 level[i]= 1 - 2*get_bits1(gb);
4160 int level_code, mask;
4161 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4162 int prefix= get_level_prefix(gb);
4164 //first coefficient has suffix_length equal to 0 or 1
4165 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4167 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4169 level_code= (prefix<<suffix_length); //part
4170 }else if(prefix==14){
4172 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4174 level_code= prefix + get_bits(gb, 4); //part
4176 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4177 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4179 level_code += (1<<(prefix-3))-4096;
4182 if(trailing_ones < 3) level_code += 2;
4187 mask= -(level_code&1);
4188 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4191 //remaining coefficients have suffix_length > 0
4192 for(;i<total_coeff;i++) {
4193 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4194 prefix = get_level_prefix(gb);
4196 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4198 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4200 level_code += (1<<(prefix-3))-4096;
4202 mask= -(level_code&1);
4203 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4204 if(level_code > suffix_limit[suffix_length])
4209 if(total_coeff == max_coeff)
4212 if(n == CHROMA_DC_BLOCK_INDEX)
4213 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4215 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4218 coeff_num = zeros_left + total_coeff - 1;
4219 j = scantable[coeff_num];
4221 block[j] = level[0];
4222 for(i=1;i<total_coeff;i++) {
4225 else if(zeros_left < 7){
4226 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4228 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4230 zeros_left -= run_before;
4231 coeff_num -= 1 + run_before;
4232 j= scantable[ coeff_num ];
4237 block[j] = (level[0] * qmul[j] + 32)>>6;
4238 for(i=1;i<total_coeff;i++) {
4241 else if(zeros_left < 7){
4242 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4244 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4246 zeros_left -= run_before;
4247 coeff_num -= 1 + run_before;
4248 j= scantable[ coeff_num ];
4250 block[j]= (level[i] * qmul[j] + 32)>>6;
4255 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4262 static void predict_field_decoding_flag(H264Context *h){
4263 MpegEncContext * const s = &h->s;
4264 const int mb_xy= h->mb_xy;
4265 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4266 ? s->current_picture.mb_type[mb_xy-1]
4267 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4268 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4270 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4274 * decodes a P_SKIP or B_SKIP macroblock
4276 static void decode_mb_skip(H264Context *h){
4277 MpegEncContext * const s = &h->s;
4278 const int mb_xy= h->mb_xy;
4281 memset(h->non_zero_count[mb_xy], 0, 16);
4282 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4285 mb_type|= MB_TYPE_INTERLACED;
4287 if( h->slice_type_nos == FF_B_TYPE )
4289 // just for fill_caches. pred_direct_motion will set the real mb_type
4290 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4292 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4293 pred_direct_motion(h, &mb_type);
4294 mb_type|= MB_TYPE_SKIP;
4299 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4301 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4302 pred_pskip_motion(h, &mx, &my);
4303 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4304 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4307 write_back_motion(h, mb_type);
4308 s->current_picture.mb_type[mb_xy]= mb_type;
4309 s->current_picture.qscale_table[mb_xy]= s->qscale;
4310 h->slice_table[ mb_xy ]= h->slice_num;
4311 h->prev_mb_skipped= 1;
4315 * decodes a macroblock
4316 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4318 static int decode_mb_cavlc(H264Context *h){
4319 MpegEncContext * const s = &h->s;
4321 int partition_count;
4322 unsigned int mb_type, cbp;
4323 int dct8x8_allowed= h->pps.transform_8x8_mode;
4325 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4327 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4329 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4330 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4332 if(h->slice_type_nos != FF_I_TYPE){
4333 if(s->mb_skip_run==-1)
4334 s->mb_skip_run= get_ue_golomb(&s->gb);
4336 if (s->mb_skip_run--) {
4337 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4338 if(s->mb_skip_run==0)
4339 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4341 predict_field_decoding_flag(h);
4348 if( (s->mb_y&1) == 0 )
4349 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4351 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4353 h->prev_mb_skipped= 0;
4355 mb_type= get_ue_golomb(&s->gb);
4356 if(h->slice_type_nos == FF_B_TYPE){
4358 partition_count= b_mb_type_info[mb_type].partition_count;
4359 mb_type= b_mb_type_info[mb_type].type;
4362 goto decode_intra_mb;
4364 }else if(h->slice_type_nos == FF_P_TYPE){
4366 partition_count= p_mb_type_info[mb_type].partition_count;
4367 mb_type= p_mb_type_info[mb_type].type;
4370 goto decode_intra_mb;
4373 assert(h->slice_type_nos == FF_I_TYPE);
4374 if(h->slice_type == FF_SI_TYPE && mb_type)
4378 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4382 cbp= i_mb_type_info[mb_type].cbp;
4383 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4384 mb_type= i_mb_type_info[mb_type].type;
4388 mb_type |= MB_TYPE_INTERLACED;
4390 h->slice_table[ mb_xy ]= h->slice_num;
4392 if(IS_INTRA_PCM(mb_type)){
4395 // We assume these blocks are very rare so we do not optimize it.
4396 align_get_bits(&s->gb);
4398 // The pixels are stored in the same order as levels in h->mb array.
4399 for(x=0; x < (CHROMA ? 384 : 256); x++){
4400 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4403 // In deblocking, the quantizer is 0
4404 s->current_picture.qscale_table[mb_xy]= 0;
4405 // All coeffs are present
4406 memset(h->non_zero_count[mb_xy], 16, 16);
4408 s->current_picture.mb_type[mb_xy]= mb_type;
4413 h->ref_count[0] <<= 1;
4414 h->ref_count[1] <<= 1;
4417 fill_caches(h, mb_type, 0);
4420 if(IS_INTRA(mb_type)){
4422 // init_top_left_availability(h);
4423 if(IS_INTRA4x4(mb_type)){
4426 if(dct8x8_allowed && get_bits1(&s->gb)){
4427 mb_type |= MB_TYPE_8x8DCT;
4431 // fill_intra4x4_pred_table(h);
4432 for(i=0; i<16; i+=di){
4433 int mode= pred_intra_mode(h, i);
4435 if(!get_bits1(&s->gb)){
4436 const int rem_mode= get_bits(&s->gb, 3);
4437 mode = rem_mode + (rem_mode >= mode);
4441 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4443 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4445 write_back_intra_pred_mode(h);
4446 if( check_intra4x4_pred_mode(h) < 0)
4449 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4450 if(h->intra16x16_pred_mode < 0)
4454 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4457 h->chroma_pred_mode= pred_mode;
4459 }else if(partition_count==4){
4460 int i, j, sub_partition_count[4], list, ref[2][4];
4462 if(h->slice_type_nos == FF_B_TYPE){
4464 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4465 if(h->sub_mb_type[i] >=13){
4466 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4469 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4470 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4472 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4473 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4474 pred_direct_motion(h, &mb_type);
4475 h->ref_cache[0][scan8[4]] =
4476 h->ref_cache[1][scan8[4]] =
4477 h->ref_cache[0][scan8[12]] =
4478 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4481 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4483 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4484 if(h->sub_mb_type[i] >=4){
4485 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4488 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4489 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4493 for(list=0; list<h->list_count; list++){
4494 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4496 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4497 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4498 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4500 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4512 dct8x8_allowed = get_dct8x8_allowed(h);
4514 for(list=0; list<h->list_count; list++){
4516 if(IS_DIRECT(h->sub_mb_type[i])) {
4517 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4520 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4521 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4523 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4524 const int sub_mb_type= h->sub_mb_type[i];
4525 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4526 for(j=0; j<sub_partition_count[i]; j++){
4528 const int index= 4*i + block_width*j;
4529 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4530 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4531 mx += get_se_golomb(&s->gb);
4532 my += get_se_golomb(&s->gb);
4533 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4535 if(IS_SUB_8X8(sub_mb_type)){
4537 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4539 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4540 }else if(IS_SUB_8X4(sub_mb_type)){
4541 mv_cache[ 1 ][0]= mx;
4542 mv_cache[ 1 ][1]= my;
4543 }else if(IS_SUB_4X8(sub_mb_type)){
4544 mv_cache[ 8 ][0]= mx;
4545 mv_cache[ 8 ][1]= my;
4547 mv_cache[ 0 ][0]= mx;
4548 mv_cache[ 0 ][1]= my;
4551 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4557 }else if(IS_DIRECT(mb_type)){
4558 pred_direct_motion(h, &mb_type);
4559 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4561 int list, mx, my, i;
4562 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4563 if(IS_16X16(mb_type)){
4564 for(list=0; list<h->list_count; list++){
4566 if(IS_DIR(mb_type, 0, list)){
4567 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4568 if(val >= h->ref_count[list]){
4569 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4573 val= LIST_NOT_USED&0xFF;
4574 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4576 for(list=0; list<h->list_count; list++){
4578 if(IS_DIR(mb_type, 0, list)){
4579 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4580 mx += get_se_golomb(&s->gb);
4581 my += get_se_golomb(&s->gb);
4582 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4584 val= pack16to32(mx,my);
4587 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4590 else if(IS_16X8(mb_type)){
4591 for(list=0; list<h->list_count; list++){
4594 if(IS_DIR(mb_type, i, list)){
4595 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4596 if(val >= h->ref_count[list]){
4597 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4601 val= LIST_NOT_USED&0xFF;
4602 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4605 for(list=0; list<h->list_count; list++){
4608 if(IS_DIR(mb_type, i, list)){
4609 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4610 mx += get_se_golomb(&s->gb);
4611 my += get_se_golomb(&s->gb);
4612 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4614 val= pack16to32(mx,my);
4617 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4621 assert(IS_8X16(mb_type));
4622 for(list=0; list<h->list_count; list++){
4625 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4626 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4627 if(val >= h->ref_count[list]){
4628 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4632 val= LIST_NOT_USED&0xFF;
4633 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4636 for(list=0; list<h->list_count; list++){
4639 if(IS_DIR(mb_type, i, list)){
4640 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4641 mx += get_se_golomb(&s->gb);
4642 my += get_se_golomb(&s->gb);
4643 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4645 val= pack16to32(mx,my);
4648 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4654 if(IS_INTER(mb_type))
4655 write_back_motion(h, mb_type);
4657 if(!IS_INTRA16x16(mb_type)){
4658 cbp= get_ue_golomb(&s->gb);
4660 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4665 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4666 else cbp= golomb_to_inter_cbp [cbp];
4668 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4669 else cbp= golomb_to_inter_cbp_gray[cbp];
4674 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4675 if(get_bits1(&s->gb)){
4676 mb_type |= MB_TYPE_8x8DCT;
4677 h->cbp_table[mb_xy]= cbp;
4680 s->current_picture.mb_type[mb_xy]= mb_type;
4682 if(cbp || IS_INTRA16x16(mb_type)){
4683 int i8x8, i4x4, chroma_idx;
4685 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4686 const uint8_t *scan, *scan8x8, *dc_scan;
4688 // fill_non_zero_count_cache(h);
4690 if(IS_INTERLACED(mb_type)){
4691 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4692 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4693 dc_scan= luma_dc_field_scan;
4695 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4696 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4697 dc_scan= luma_dc_zigzag_scan;
4700 dquant= get_se_golomb(&s->gb);
4702 if( dquant > 25 || dquant < -26 ){
4703 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4707 s->qscale += dquant;
4708 if(((unsigned)s->qscale) > 51){
4709 if(s->qscale<0) s->qscale+= 52;
4710 else s->qscale-= 52;
4713 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4714 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4715 if(IS_INTRA16x16(mb_type)){
4716 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4717 return -1; //FIXME continue if partitioned and other return -1 too
4720 assert((cbp&15) == 0 || (cbp&15) == 15);
4723 for(i8x8=0; i8x8<4; i8x8++){
4724 for(i4x4=0; i4x4<4; i4x4++){
4725 const int index= i4x4 + 4*i8x8;
4726 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4732 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4735 for(i8x8=0; i8x8<4; i8x8++){
4736 if(cbp & (1<<i8x8)){
4737 if(IS_8x8DCT(mb_type)){
4738 DCTELEM *buf = &h->mb[64*i8x8];
4740 for(i4x4=0; i4x4<4; i4x4++){
4741 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4742 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4745 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4746 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4748 for(i4x4=0; i4x4<4; i4x4++){
4749 const int index= i4x4 + 4*i8x8;
4751 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4757 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4758 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4764 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4765 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4771 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4772 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4773 for(i4x4=0; i4x4<4; i4x4++){
4774 const int index= 16 + 4*chroma_idx + i4x4;
4775 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4781 uint8_t * const nnz= &h->non_zero_count_cache[0];
4782 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4783 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4786 uint8_t * const nnz= &h->non_zero_count_cache[0];
4787 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4788 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4789 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4791 s->current_picture.qscale_table[mb_xy]= s->qscale;
4792 write_back_non_zero_count(h);
4795 h->ref_count[0] >>= 1;
4796 h->ref_count[1] >>= 1;
4802 static int decode_cabac_field_decoding_flag(H264Context *h) {
4803 MpegEncContext * const s = &h->s;
4804 const int mb_x = s->mb_x;
4805 const int mb_y = s->mb_y & ~1;
4806 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4807 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4809 unsigned int ctx = 0;
4811 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4814 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4818 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4821 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4822 uint8_t *state= &h->cabac_state[ctx_base];
4826 MpegEncContext * const s = &h->s;
4827 const int mba_xy = h->left_mb_xy[0];
4828 const int mbb_xy = h->top_mb_xy;
4830 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4832 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4834 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4835 return 0; /* I4x4 */
4838 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4839 return 0; /* I4x4 */
4842 if( get_cabac_terminate( &h->cabac ) )
4843 return 25; /* PCM */
4845 mb_type = 1; /* I16x16 */
4846 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4847 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4848 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4849 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4850 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4854 static int decode_cabac_mb_type( H264Context *h ) {
4855 MpegEncContext * const s = &h->s;
4857 if( h->slice_type_nos == FF_I_TYPE ) {
4858 return decode_cabac_intra_mb_type(h, 3, 1);
4859 } else if( h->slice_type_nos == FF_P_TYPE ) {
4860 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4862 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4863 /* P_L0_D16x16, P_8x8 */
4864 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4866 /* P_L0_D8x16, P_L0_D16x8 */
4867 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4870 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4872 } else if( h->slice_type_nos == FF_B_TYPE ) {
4873 const int mba_xy = h->left_mb_xy[0];
4874 const int mbb_xy = h->top_mb_xy;
4878 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4880 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4883 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4884 return 0; /* B_Direct_16x16 */
4886 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4887 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4890 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4891 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4892 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4893 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4895 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4896 else if( bits == 13 ) {
4897 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4898 } else if( bits == 14 )
4899 return 11; /* B_L1_L0_8x16 */
4900 else if( bits == 15 )
4901 return 22; /* B_8x8 */
4903 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4904 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4906 /* TODO SI/SP frames? */
4911 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4912 MpegEncContext * const s = &h->s;
4916 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4917 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4920 && h->slice_table[mba_xy] == h->slice_num
4921 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4922 mba_xy += s->mb_stride;
4924 mbb_xy = mb_xy - s->mb_stride;
4926 && h->slice_table[mbb_xy] == h->slice_num
4927 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4928 mbb_xy -= s->mb_stride;
4930 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4932 int mb_xy = h->mb_xy;
4934 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4937 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4939 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4942 if( h->slice_type_nos == FF_B_TYPE )
4944 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4947 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4950 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4953 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4954 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4955 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4957 if( mode >= pred_mode )
4963 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4964 const int mba_xy = h->left_mb_xy[0];
4965 const int mbb_xy = h->top_mb_xy;
4969 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4970 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4973 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4976 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4979 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4981 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4987 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4988 int cbp_b, cbp_a, ctx, cbp = 0;
4990 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4991 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4993 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4994 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4995 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4996 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4997 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4998 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4999 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5000 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5003 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5007 cbp_a = (h->left_cbp>>4)&0x03;
5008 cbp_b = (h-> top_cbp>>4)&0x03;
5011 if( cbp_a > 0 ) ctx++;
5012 if( cbp_b > 0 ) ctx += 2;
5013 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5017 if( cbp_a == 2 ) ctx++;
5018 if( cbp_b == 2 ) ctx += 2;
5019 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5021 static int decode_cabac_mb_dqp( H264Context *h) {
5025 if( h->last_qscale_diff != 0 )
5028 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5034 if(val > 102) //prevent infinite loop
5041 return -(val + 1)/2;
5043 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5044 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5046 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5048 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5052 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5054 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5055 return 0; /* B_Direct_8x8 */
5056 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5057 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5059 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5060 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5061 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5064 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5065 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5069 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5070 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5073 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5074 int refa = h->ref_cache[list][scan8[n] - 1];
5075 int refb = h->ref_cache[list][scan8[n] - 8];
5079 if( h->slice_type_nos == FF_B_TYPE) {
5080 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5082 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5091 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5097 if(ref >= 32 /*h->ref_list[list]*/){
5098 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5099 return 0; //FIXME we should return -1 and check the return everywhere
5105 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5106 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5107 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5108 int ctxbase = (l == 0) ? 40 : 47;
5113 else if( amvd > 32 )
5118 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5123 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5131 while( get_cabac_bypass( &h->cabac ) ) {
5135 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5140 if( get_cabac_bypass( &h->cabac ) )
5144 return get_cabac_bypass_sign( &h->cabac, -mvd );
5147 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5153 nza = h->left_cbp&0x100;
5154 nzb = h-> top_cbp&0x100;
5156 nza = (h->left_cbp>>(6+idx))&0x01;
5157 nzb = (h-> top_cbp>>(6+idx))&0x01;
5161 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5162 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5164 assert(cat == 1 || cat == 2);
5165 nza = h->non_zero_count_cache[scan8[idx] - 1];
5166 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5176 return ctx + 4 * cat;
5179 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5180 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5181 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5182 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5183 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5186 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5187 static const int significant_coeff_flag_offset[2][6] = {
5188 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5189 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5191 static const int last_coeff_flag_offset[2][6] = {
5192 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5193 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5195 static const int coeff_abs_level_m1_offset[6] = {
5196 227+0, 227+10, 227+20, 227+30, 227+39, 426
5198 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5199 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5200 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5201 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5202 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5203 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5204 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5205 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5206 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5208 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5209 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5210 * map node ctx => cabac ctx for level=1 */
5211 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5212 /* map node ctx => cabac ctx for level>1 */
5213 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5214 static const uint8_t coeff_abs_level_transition[2][8] = {
5215 /* update node ctx after decoding a level=1 */
5216 { 1, 2, 3, 3, 4, 5, 6, 7 },
5217 /* update node ctx after decoding a level>1 */
5218 { 4, 4, 4, 4, 5, 6, 7, 7 }
5224 int coeff_count = 0;
5227 uint8_t *significant_coeff_ctx_base;
5228 uint8_t *last_coeff_ctx_base;
5229 uint8_t *abs_level_m1_ctx_base;
5232 #define CABAC_ON_STACK
5234 #ifdef CABAC_ON_STACK
5237 cc.range = h->cabac.range;
5238 cc.low = h->cabac.low;
5239 cc.bytestream= h->cabac.bytestream;
5241 #define CC &h->cabac
5245 /* cat: 0-> DC 16x16 n = 0
5246 * 1-> AC 16x16 n = luma4x4idx
5247 * 2-> Luma4x4 n = luma4x4idx
5248 * 3-> DC Chroma n = iCbCr
5249 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5250 * 5-> Luma8x8 n = 4 * luma8x8idx
5253 /* read coded block flag */
5254 if( is_dc || cat != 5 ) {
5255 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5258 h->non_zero_count_cache[scan8[16+n]] = 0;
5260 h->non_zero_count_cache[scan8[n]] = 0;
5263 #ifdef CABAC_ON_STACK
5264 h->cabac.range = cc.range ;
5265 h->cabac.low = cc.low ;
5266 h->cabac.bytestream= cc.bytestream;
5272 significant_coeff_ctx_base = h->cabac_state
5273 + significant_coeff_flag_offset[MB_FIELD][cat];
5274 last_coeff_ctx_base = h->cabac_state
5275 + last_coeff_flag_offset[MB_FIELD][cat];
5276 abs_level_m1_ctx_base = h->cabac_state
5277 + coeff_abs_level_m1_offset[cat];
5279 if( !is_dc && cat == 5 ) {
5280 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5281 for(last= 0; last < coefs; last++) { \
5282 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5283 if( get_cabac( CC, sig_ctx )) { \
5284 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5285 index[coeff_count++] = last; \
5286 if( get_cabac( CC, last_ctx ) ) { \
5292 if( last == max_coeff -1 ) {\
5293 index[coeff_count++] = last;\
5295 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5296 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5297 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5299 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5301 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5303 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5306 assert(coeff_count > 0);
5310 h->cbp_table[h->mb_xy] |= 0x100;
5312 h->cbp_table[h->mb_xy] |= 0x40 << n;
5315 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5317 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5319 assert( cat == 1 || cat == 2 );
5320 h->non_zero_count_cache[scan8[n]] = coeff_count;
5325 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5327 int j= scantable[index[--coeff_count]];
5329 if( get_cabac( CC, ctx ) == 0 ) {
5330 node_ctx = coeff_abs_level_transition[0][node_ctx];
5332 block[j] = get_cabac_bypass_sign( CC, -1);
5334 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5338 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5339 node_ctx = coeff_abs_level_transition[1][node_ctx];
5341 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5345 if( coeff_abs >= 15 ) {
5347 while( get_cabac_bypass( CC ) ) {
5353 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5359 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5361 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5364 } while( coeff_count );
5365 #ifdef CABAC_ON_STACK
5366 h->cabac.range = cc.range ;
5367 h->cabac.low = cc.low ;
5368 h->cabac.bytestream= cc.bytestream;
5373 #ifndef CONFIG_SMALL
5374 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5375 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5378 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5379 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5383 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5385 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5387 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5388 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5392 static inline void compute_mb_neighbors(H264Context *h)
5394 MpegEncContext * const s = &h->s;
5395 const int mb_xy = h->mb_xy;
5396 h->top_mb_xy = mb_xy - s->mb_stride;
5397 h->left_mb_xy[0] = mb_xy - 1;
5399 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5400 const int top_pair_xy = pair_xy - s->mb_stride;
5401 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5402 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5403 const int curr_mb_frame_flag = !MB_FIELD;
5404 const int bottom = (s->mb_y & 1);
5406 ? !curr_mb_frame_flag // bottom macroblock
5407 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5409 h->top_mb_xy -= s->mb_stride;
5411 if (left_mb_frame_flag != curr_mb_frame_flag) {
5412 h->left_mb_xy[0] = pair_xy - 1;
5414 } else if (FIELD_PICTURE) {
5415 h->top_mb_xy -= s->mb_stride;
5421 * decodes a macroblock
5422 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5424 static int decode_mb_cabac(H264Context *h) {
5425 MpegEncContext * const s = &h->s;
5427 int mb_type, partition_count, cbp = 0;
5428 int dct8x8_allowed= h->pps.transform_8x8_mode;
5430 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5432 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5434 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5435 if( h->slice_type_nos != FF_I_TYPE ) {
5437 /* a skipped mb needs the aff flag from the following mb */
5438 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5439 predict_field_decoding_flag(h);
5440 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5441 skip = h->next_mb_skipped;
5443 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5444 /* read skip flags */
5446 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5447 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5448 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5449 if(h->next_mb_skipped)
5450 predict_field_decoding_flag(h);
5452 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5457 h->cbp_table[mb_xy] = 0;
5458 h->chroma_pred_mode_table[mb_xy] = 0;
5459 h->last_qscale_diff = 0;
5466 if( (s->mb_y&1) == 0 )
5468 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5470 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5472 h->prev_mb_skipped = 0;
5474 compute_mb_neighbors(h);
5475 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5476 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5480 if( h->slice_type_nos == FF_B_TYPE ) {
5482 partition_count= b_mb_type_info[mb_type].partition_count;
5483 mb_type= b_mb_type_info[mb_type].type;
5486 goto decode_intra_mb;
5488 } else if( h->slice_type_nos == FF_P_TYPE ) {
5490 partition_count= p_mb_type_info[mb_type].partition_count;
5491 mb_type= p_mb_type_info[mb_type].type;
5494 goto decode_intra_mb;
5497 if(h->slice_type == FF_SI_TYPE && mb_type)
5499 assert(h->slice_type_nos == FF_I_TYPE);
5501 partition_count = 0;
5502 cbp= i_mb_type_info[mb_type].cbp;
5503 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5504 mb_type= i_mb_type_info[mb_type].type;
5507 mb_type |= MB_TYPE_INTERLACED;
5509 h->slice_table[ mb_xy ]= h->slice_num;
5511 if(IS_INTRA_PCM(mb_type)) {
5514 // We assume these blocks are very rare so we do not optimize it.
5515 // FIXME The two following lines get the bitstream position in the cabac
5516 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5517 ptr= h->cabac.bytestream;
5518 if(h->cabac.low&0x1) ptr--;
5520 if(h->cabac.low&0x1FF) ptr--;
5523 // The pixels are stored in the same order as levels in h->mb array.
5524 memcpy(h->mb, ptr, 256); ptr+=256;
5526 memcpy(h->mb+128, ptr, 128); ptr+=128;
5529 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5531 // All blocks are present
5532 h->cbp_table[mb_xy] = 0x1ef;
5533 h->chroma_pred_mode_table[mb_xy] = 0;
5534 // In deblocking, the quantizer is 0
5535 s->current_picture.qscale_table[mb_xy]= 0;
5536 // All coeffs are present
5537 memset(h->non_zero_count[mb_xy], 16, 16);
5538 s->current_picture.mb_type[mb_xy]= mb_type;
5539 h->last_qscale_diff = 0;
5544 h->ref_count[0] <<= 1;
5545 h->ref_count[1] <<= 1;
5548 fill_caches(h, mb_type, 0);
5550 if( IS_INTRA( mb_type ) ) {
5552 if( IS_INTRA4x4( mb_type ) ) {
5553 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5554 mb_type |= MB_TYPE_8x8DCT;
5555 for( i = 0; i < 16; i+=4 ) {
5556 int pred = pred_intra_mode( h, i );
5557 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5558 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5561 for( i = 0; i < 16; i++ ) {
5562 int pred = pred_intra_mode( h, i );
5563 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5565 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5568 write_back_intra_pred_mode(h);
5569 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5571 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5572 if( h->intra16x16_pred_mode < 0 ) return -1;
5575 h->chroma_pred_mode_table[mb_xy] =
5576 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5578 pred_mode= check_intra_pred_mode( h, pred_mode );
5579 if( pred_mode < 0 ) return -1;
5580 h->chroma_pred_mode= pred_mode;
5582 } else if( partition_count == 4 ) {
5583 int i, j, sub_partition_count[4], list, ref[2][4];
5585 if( h->slice_type_nos == FF_B_TYPE ) {
5586 for( i = 0; i < 4; i++ ) {
5587 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5588 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5589 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5591 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5592 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5593 pred_direct_motion(h, &mb_type);
5594 h->ref_cache[0][scan8[4]] =
5595 h->ref_cache[1][scan8[4]] =
5596 h->ref_cache[0][scan8[12]] =
5597 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5598 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5599 for( i = 0; i < 4; i++ )
5600 if( IS_DIRECT(h->sub_mb_type[i]) )
5601 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5605 for( i = 0; i < 4; i++ ) {
5606 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5607 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5608 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5612 for( list = 0; list < h->list_count; list++ ) {
5613 for( i = 0; i < 4; i++ ) {
5614 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5615 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5616 if( h->ref_count[list] > 1 )
5617 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5623 h->ref_cache[list][ scan8[4*i]+1 ]=
5624 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5629 dct8x8_allowed = get_dct8x8_allowed(h);
5631 for(list=0; list<h->list_count; list++){
5633 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5634 if(IS_DIRECT(h->sub_mb_type[i])){
5635 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5639 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5640 const int sub_mb_type= h->sub_mb_type[i];
5641 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5642 for(j=0; j<sub_partition_count[i]; j++){
5645 const int index= 4*i + block_width*j;
5646 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5647 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5648 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5650 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5651 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5652 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5654 if(IS_SUB_8X8(sub_mb_type)){
5656 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5658 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5661 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5663 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5664 }else if(IS_SUB_8X4(sub_mb_type)){
5665 mv_cache[ 1 ][0]= mx;
5666 mv_cache[ 1 ][1]= my;
5668 mvd_cache[ 1 ][0]= mx - mpx;
5669 mvd_cache[ 1 ][1]= my - mpy;
5670 }else if(IS_SUB_4X8(sub_mb_type)){
5671 mv_cache[ 8 ][0]= mx;
5672 mv_cache[ 8 ][1]= my;
5674 mvd_cache[ 8 ][0]= mx - mpx;
5675 mvd_cache[ 8 ][1]= my - mpy;
5677 mv_cache[ 0 ][0]= mx;
5678 mv_cache[ 0 ][1]= my;
5680 mvd_cache[ 0 ][0]= mx - mpx;
5681 mvd_cache[ 0 ][1]= my - mpy;
5684 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5685 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5686 p[0] = p[1] = p[8] = p[9] = 0;
5687 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5691 } else if( IS_DIRECT(mb_type) ) {
5692 pred_direct_motion(h, &mb_type);
5693 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5694 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5695 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5697 int list, mx, my, i, mpx, mpy;
5698 if(IS_16X16(mb_type)){
5699 for(list=0; list<h->list_count; list++){
5700 if(IS_DIR(mb_type, 0, list)){
5701 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5702 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5704 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5706 for(list=0; list<h->list_count; list++){
5707 if(IS_DIR(mb_type, 0, list)){
5708 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5710 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5711 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5712 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5714 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5715 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5717 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5720 else if(IS_16X8(mb_type)){
5721 for(list=0; list<h->list_count; list++){
5723 if(IS_DIR(mb_type, i, list)){
5724 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5725 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5727 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5730 for(list=0; list<h->list_count; list++){
5732 if(IS_DIR(mb_type, i, list)){
5733 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5734 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5735 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5736 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5738 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5739 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5741 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5742 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5747 assert(IS_8X16(mb_type));
5748 for(list=0; list<h->list_count; list++){
5750 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5751 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5752 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5754 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5757 for(list=0; list<h->list_count; list++){
5759 if(IS_DIR(mb_type, i, list)){
5760 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5761 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5762 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5764 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5765 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5766 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5768 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5769 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5776 if( IS_INTER( mb_type ) ) {
5777 h->chroma_pred_mode_table[mb_xy] = 0;
5778 write_back_motion( h, mb_type );
5781 if( !IS_INTRA16x16( mb_type ) ) {
5782 cbp = decode_cabac_mb_cbp_luma( h );
5784 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5787 h->cbp_table[mb_xy] = h->cbp = cbp;
5789 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5790 if( decode_cabac_mb_transform_size( h ) )
5791 mb_type |= MB_TYPE_8x8DCT;
5793 s->current_picture.mb_type[mb_xy]= mb_type;
5795 if( cbp || IS_INTRA16x16( mb_type ) ) {
5796 const uint8_t *scan, *scan8x8, *dc_scan;
5797 const uint32_t *qmul;
5800 if(IS_INTERLACED(mb_type)){
5801 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5802 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5803 dc_scan= luma_dc_field_scan;
5805 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5806 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5807 dc_scan= luma_dc_zigzag_scan;
5810 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5811 if( dqp == INT_MIN ){
5812 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5816 if(((unsigned)s->qscale) > 51){
5817 if(s->qscale<0) s->qscale+= 52;
5818 else s->qscale-= 52;
5820 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5821 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5823 if( IS_INTRA16x16( mb_type ) ) {
5825 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5826 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5829 qmul = h->dequant4_coeff[0][s->qscale];
5830 for( i = 0; i < 16; i++ ) {
5831 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5832 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5835 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5839 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5840 if( cbp & (1<<i8x8) ) {
5841 if( IS_8x8DCT(mb_type) ) {
5842 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5843 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5845 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5846 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5847 const int index = 4*i8x8 + i4x4;
5848 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5850 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5851 //STOP_TIMER("decode_residual")
5855 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5856 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5863 for( c = 0; c < 2; c++ ) {
5864 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5865 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5871 for( c = 0; c < 2; c++ ) {
5872 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5873 for( i = 0; i < 4; i++ ) {
5874 const int index = 16 + 4 * c + i;
5875 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5876 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5880 uint8_t * const nnz= &h->non_zero_count_cache[0];
5881 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5882 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5885 uint8_t * const nnz= &h->non_zero_count_cache[0];
5886 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5887 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5888 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5889 h->last_qscale_diff = 0;
5892 s->current_picture.qscale_table[mb_xy]= s->qscale;
5893 write_back_non_zero_count(h);
5896 h->ref_count[0] >>= 1;
5897 h->ref_count[1] >>= 1;
5904 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5906 const int index_a = qp + h->slice_alpha_c0_offset;
5907 const int alpha = (alpha_table+52)[index_a];
5908 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5913 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5914 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5916 /* 16px edge length, because bS=4 is triggered by being at
5917 * the edge of an intra MB, so all 4 bS are the same */
5918 for( d = 0; d < 16; d++ ) {
5919 const int p0 = pix[-1];
5920 const int p1 = pix[-2];
5921 const int p2 = pix[-3];
5923 const int q0 = pix[0];
5924 const int q1 = pix[1];
5925 const int q2 = pix[2];
5927 if( FFABS( p0 - q0 ) < alpha &&
5928 FFABS( p1 - p0 ) < beta &&
5929 FFABS( q1 - q0 ) < beta ) {
5931 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5932 if( FFABS( p2 - p0 ) < beta)
5934 const int p3 = pix[-4];
5936 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5937 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5938 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5941 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5943 if( FFABS( q2 - q0 ) < beta)
5945 const int q3 = pix[3];
5947 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5948 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5949 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5952 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5956 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5957 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5959 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5965 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5967 const int index_a = qp + h->slice_alpha_c0_offset;
5968 const int alpha = (alpha_table+52)[index_a];
5969 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5974 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5975 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5977 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5981 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5983 for( i = 0; i < 16; i++, pix += stride) {
5989 int bS_index = (i >> 1);
5992 bS_index |= (i & 1);
5995 if( bS[bS_index] == 0 ) {
5999 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6000 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6001 alpha = (alpha_table+52)[index_a];
6002 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6004 if( bS[bS_index] < 4 ) {
6005 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6006 const int p0 = pix[-1];
6007 const int p1 = pix[-2];
6008 const int p2 = pix[-3];
6009 const int q0 = pix[0];
6010 const int q1 = pix[1];
6011 const int q2 = pix[2];
6013 if( FFABS( p0 - q0 ) < alpha &&
6014 FFABS( p1 - p0 ) < beta &&
6015 FFABS( q1 - q0 ) < beta ) {
6019 if( FFABS( p2 - p0 ) < beta ) {
6020 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6023 if( FFABS( q2 - q0 ) < beta ) {
6024 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6028 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6029 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6030 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6031 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6034 const int p0 = pix[-1];
6035 const int p1 = pix[-2];
6036 const int p2 = pix[-3];
6038 const int q0 = pix[0];
6039 const int q1 = pix[1];
6040 const int q2 = pix[2];
6042 if( FFABS( p0 - q0 ) < alpha &&
6043 FFABS( p1 - p0 ) < beta &&
6044 FFABS( q1 - q0 ) < beta ) {
6046 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6047 if( FFABS( p2 - p0 ) < beta)
6049 const int p3 = pix[-4];
6051 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6052 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6053 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6056 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6058 if( FFABS( q2 - q0 ) < beta)
6060 const int q3 = pix[3];
6062 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6063 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6064 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6067 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6071 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6072 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6074 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6079 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6081 for( i = 0; i < 8; i++, pix += stride) {
6089 if( bS[bS_index] == 0 ) {
6093 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6094 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6095 alpha = (alpha_table+52)[index_a];
6096 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6098 if( bS[bS_index] < 4 ) {
6099 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6100 const int p0 = pix[-1];
6101 const int p1 = pix[-2];
6102 const int q0 = pix[0];
6103 const int q1 = pix[1];
6105 if( FFABS( p0 - q0 ) < alpha &&
6106 FFABS( p1 - p0 ) < beta &&
6107 FFABS( q1 - q0 ) < beta ) {
6108 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6110 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6111 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6112 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6115 const int p0 = pix[-1];
6116 const int p1 = pix[-2];
6117 const int q0 = pix[0];
6118 const int q1 = pix[1];
6120 if( FFABS( p0 - q0 ) < alpha &&
6121 FFABS( p1 - p0 ) < beta &&
6122 FFABS( q1 - q0 ) < beta ) {
6124 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6125 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6126 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6132 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6134 const int index_a = qp + h->slice_alpha_c0_offset;
6135 const int alpha = (alpha_table+52)[index_a];
6136 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6137 const int pix_next = stride;
6142 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6143 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6145 /* 16px edge length, see filter_mb_edgev */
6146 for( d = 0; d < 16; d++ ) {
6147 const int p0 = pix[-1*pix_next];
6148 const int p1 = pix[-2*pix_next];
6149 const int p2 = pix[-3*pix_next];
6150 const int q0 = pix[0];
6151 const int q1 = pix[1*pix_next];
6152 const int q2 = pix[2*pix_next];
6154 if( FFABS( p0 - q0 ) < alpha &&
6155 FFABS( p1 - p0 ) < beta &&
6156 FFABS( q1 - q0 ) < beta ) {
6158 const int p3 = pix[-4*pix_next];
6159 const int q3 = pix[ 3*pix_next];
6161 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6162 if( FFABS( p2 - p0 ) < beta) {
6164 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6165 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6166 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6169 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6171 if( FFABS( q2 - q0 ) < beta) {
6173 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6174 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6175 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6178 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6182 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6183 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6185 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6192 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6194 const int index_a = qp + h->slice_alpha_c0_offset;
6195 const int alpha = (alpha_table+52)[index_a];
6196 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6201 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6202 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6204 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6208 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6209 MpegEncContext * const s = &h->s;
6210 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6212 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6216 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6218 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6219 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6220 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6223 assert(!FRAME_MBAFF);
6225 mb_type = s->current_picture.mb_type[mb_xy];
6226 qp = s->current_picture.qscale_table[mb_xy];
6227 qp0 = s->current_picture.qscale_table[mb_xy-1];
6228 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6229 qpc = get_chroma_qp( h, 0, qp );
6230 qpc0 = get_chroma_qp( h, 0, qp0 );
6231 qpc1 = get_chroma_qp( h, 0, qp1 );
6232 qp0 = (qp + qp0 + 1) >> 1;
6233 qp1 = (qp + qp1 + 1) >> 1;
6234 qpc0 = (qpc + qpc0 + 1) >> 1;
6235 qpc1 = (qpc + qpc1 + 1) >> 1;
6236 qp_thresh = 15 - h->slice_alpha_c0_offset;
6237 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6238 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6241 if( IS_INTRA(mb_type) ) {
6242 int16_t bS4[4] = {4,4,4,4};
6243 int16_t bS3[4] = {3,3,3,3};
6244 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6245 if( IS_8x8DCT(mb_type) ) {
6246 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6247 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6248 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6249 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6251 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6252 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6253 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6254 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6255 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6256 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6257 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6258 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6260 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6261 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6262 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6263 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6264 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6265 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6266 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6267 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6270 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6271 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6273 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6275 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6277 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6278 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6279 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6280 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6282 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6283 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6284 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6285 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6287 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6288 bSv[0][0] = 0x0004000400040004ULL;
6289 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6290 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6292 #define FILTER(hv,dir,edge)\
6293 if(bSv[dir][edge]) {\
6294 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6296 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6297 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6303 } else if( IS_8x8DCT(mb_type) ) {
6322 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6323 MpegEncContext * const s = &h->s;
6324 const int mb_xy= mb_x + mb_y*s->mb_stride;
6325 const int mb_type = s->current_picture.mb_type[mb_xy];
6326 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6327 int first_vertical_edge_done = 0;
6330 //for sufficiently low qp, filtering wouldn't do anything
6331 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6333 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6334 int qp = s->current_picture.qscale_table[mb_xy];
6336 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6337 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6342 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6343 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6344 int top_type, left_type[2];
6345 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6346 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6347 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6349 if(IS_8x8DCT(top_type)){
6350 h->non_zero_count_cache[4+8*0]=
6351 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6352 h->non_zero_count_cache[6+8*0]=
6353 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6355 if(IS_8x8DCT(left_type[0])){
6356 h->non_zero_count_cache[3+8*1]=
6357 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6359 if(IS_8x8DCT(left_type[1])){
6360 h->non_zero_count_cache[3+8*3]=
6361 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6364 if(IS_8x8DCT(mb_type)){
6365 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6366 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6368 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6369 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6371 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6372 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6374 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6375 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6380 // left mb is in picture
6381 && h->slice_table[mb_xy-1] != 255
6382 // and current and left pair do not have the same interlaced type
6383 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6384 // and left mb is in the same slice if deblocking_filter == 2
6385 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6386 /* First vertical edge is different in MBAFF frames
6387 * There are 8 different bS to compute and 2 different Qp
6389 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6390 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6395 int mb_qp, mbn0_qp, mbn1_qp;
6397 first_vertical_edge_done = 1;
6399 if( IS_INTRA(mb_type) )
6400 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6402 for( i = 0; i < 8; i++ ) {
6403 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6405 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6407 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6408 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6409 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6416 mb_qp = s->current_picture.qscale_table[mb_xy];
6417 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6418 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6419 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6420 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6421 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6422 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6423 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6424 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6425 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6426 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6427 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6428 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6431 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6432 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6433 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6434 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6435 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6437 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6438 for( dir = 0; dir < 2; dir++ )
6441 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6442 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6443 int (*ref2frm) [48+2] = h->ref2frm[ h->slice_num &15 ];
6444 int (*ref2frmm)[48+2] = h->ref2frm[ h->slice_table[mbm_xy]&15 ];
6445 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6447 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6448 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6449 // how often to recheck mv-based bS when iterating between edges
6450 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6451 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6452 // how often to recheck mv-based bS when iterating along each edge
6453 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6455 if (first_vertical_edge_done) {
6457 first_vertical_edge_done = 0;
6460 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6463 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6464 && !IS_INTERLACED(mb_type)
6465 && IS_INTERLACED(mbm_type)
6467 // This is a special case in the norm where the filtering must
6468 // be done twice (one each of the field) even if we are in a
6469 // frame macroblock.
6471 static const int nnz_idx[4] = {4,5,6,3};
6472 unsigned int tmp_linesize = 2 * linesize;
6473 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6474 int mbn_xy = mb_xy - 2 * s->mb_stride;
6479 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6480 if( IS_INTRA(mb_type) ||
6481 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6482 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6484 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6485 for( i = 0; i < 4; i++ ) {
6486 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6487 mbn_nnz[nnz_idx[i]] != 0 )
6493 // Do not use s->qscale as luma quantizer because it has not the same
6494 // value in IPCM macroblocks.
6495 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6496 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6497 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6498 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6499 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6500 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6501 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6502 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6509 for( edge = start; edge < edges; edge++ ) {
6510 /* mbn_xy: neighbor macroblock */
6511 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6512 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6513 int (*ref2frmn)[48+2] = edge > 0 ? ref2frm : ref2frmm;
6517 if( (edge&1) && IS_8x8DCT(mb_type) )
6520 if( IS_INTRA(mb_type) ||
6521 IS_INTRA(mbn_type) ) {
6524 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6525 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6534 bS[0] = bS[1] = bS[2] = bS[3] = value;
6539 if( edge & mask_edge ) {
6540 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6543 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6544 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6547 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6548 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6549 int bn_idx= b_idx - (dir ? 8:1);
6552 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6553 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6554 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6555 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6558 if(h->slice_type_nos == FF_B_TYPE && v){
6560 for( l = 0; !v && l < 2; l++ ) {
6562 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6563 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6564 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6568 bS[0] = bS[1] = bS[2] = bS[3] = v;
6574 for( i = 0; i < 4; i++ ) {
6575 int x = dir == 0 ? edge : i;
6576 int y = dir == 0 ? i : edge;
6577 int b_idx= 8 + 4 + x + 8*y;
6578 int bn_idx= b_idx - (dir ? 8:1);
6580 if( h->non_zero_count_cache[b_idx] != 0 ||
6581 h->non_zero_count_cache[bn_idx] != 0 ) {
6587 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6588 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6589 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6590 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6596 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6598 for( l = 0; l < 2; l++ ) {
6600 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6601 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6602 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6611 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6616 // Do not use s->qscale as luma quantizer because it has not the same
6617 // value in IPCM macroblocks.
6618 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6619 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6620 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6621 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6623 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6624 if( (edge&1) == 0 ) {
6625 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6626 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6627 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6628 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6631 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6632 if( (edge&1) == 0 ) {
6633 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6634 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6635 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6636 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6643 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6644 MpegEncContext * const s = &h->s;
6645 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6649 if( h->pps.cabac ) {
6653 align_get_bits( &s->gb );
6656 ff_init_cabac_states( &h->cabac);
6657 ff_init_cabac_decoder( &h->cabac,
6658 s->gb.buffer + get_bits_count(&s->gb)/8,
6659 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6660 /* calculate pre-state */
6661 for( i= 0; i < 460; i++ ) {
6663 if( h->slice_type_nos == FF_I_TYPE )
6664 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6666 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6669 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6671 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6676 int ret = decode_mb_cabac(h);
6678 //STOP_TIMER("decode_mb_cabac")
6680 if(ret>=0) hl_decode_mb(h);
6682 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6685 if(ret>=0) ret = decode_mb_cabac(h);
6687 if(ret>=0) hl_decode_mb(h);
6690 eos = get_cabac_terminate( &h->cabac );
6692 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6693 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6694 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6698 if( ++s->mb_x >= s->mb_width ) {
6700 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6702 if(FIELD_OR_MBAFF_PICTURE) {
6707 if( eos || s->mb_y >= s->mb_height ) {
6708 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6709 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6716 int ret = decode_mb_cavlc(h);
6718 if(ret>=0) hl_decode_mb(h);
6720 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6722 ret = decode_mb_cavlc(h);
6724 if(ret>=0) hl_decode_mb(h);
6729 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6730 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6735 if(++s->mb_x >= s->mb_width){
6737 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6739 if(FIELD_OR_MBAFF_PICTURE) {
6742 if(s->mb_y >= s->mb_height){
6743 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6745 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6746 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6750 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6757 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6758 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6759 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6760 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6764 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6773 for(;s->mb_y < s->mb_height; s->mb_y++){
6774 for(;s->mb_x < s->mb_width; s->mb_x++){
6775 int ret= decode_mb(h);
6780 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6781 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6786 if(++s->mb_x >= s->mb_width){
6788 if(++s->mb_y >= s->mb_height){
6789 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6790 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6794 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6801 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6802 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6803 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6807 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6814 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6817 return -1; //not reached
6820 static int decode_unregistered_user_data(H264Context *h, int size){
6821 MpegEncContext * const s = &h->s;
6822 uint8_t user_data[16+256];
6828 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6829 user_data[i]= get_bits(&s->gb, 8);
6833 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6834 if(e==1 && build>=0)
6835 h->x264_build= build;
6837 if(s->avctx->debug & FF_DEBUG_BUGS)
6838 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6841 skip_bits(&s->gb, 8);
6846 static int decode_sei(H264Context *h){
6847 MpegEncContext * const s = &h->s;
6849 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6854 type+= show_bits(&s->gb, 8);
6855 }while(get_bits(&s->gb, 8) == 255);
6859 size+= show_bits(&s->gb, 8);
6860 }while(get_bits(&s->gb, 8) == 255);
6864 if(decode_unregistered_user_data(h, size) < 0)
6868 skip_bits(&s->gb, 8*size);
6871 //FIXME check bits here
6872 align_get_bits(&s->gb);
6878 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6879 MpegEncContext * const s = &h->s;
6881 cpb_count = get_ue_golomb(&s->gb) + 1;
6882 get_bits(&s->gb, 4); /* bit_rate_scale */
6883 get_bits(&s->gb, 4); /* cpb_size_scale */
6884 for(i=0; i<cpb_count; i++){
6885 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6886 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6887 get_bits1(&s->gb); /* cbr_flag */
6889 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6890 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6891 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6892 get_bits(&s->gb, 5); /* time_offset_length */
6895 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6896 MpegEncContext * const s = &h->s;
6897 int aspect_ratio_info_present_flag;
6898 unsigned int aspect_ratio_idc;
6899 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6901 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6903 if( aspect_ratio_info_present_flag ) {
6904 aspect_ratio_idc= get_bits(&s->gb, 8);
6905 if( aspect_ratio_idc == EXTENDED_SAR ) {
6906 sps->sar.num= get_bits(&s->gb, 16);
6907 sps->sar.den= get_bits(&s->gb, 16);
6908 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
6909 sps->sar= pixel_aspect[aspect_ratio_idc];
6911 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6918 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6920 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6921 get_bits1(&s->gb); /* overscan_appropriate_flag */
6924 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6925 get_bits(&s->gb, 3); /* video_format */
6926 get_bits1(&s->gb); /* video_full_range_flag */
6927 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6928 get_bits(&s->gb, 8); /* colour_primaries */
6929 get_bits(&s->gb, 8); /* transfer_characteristics */
6930 get_bits(&s->gb, 8); /* matrix_coefficients */
6934 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6935 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6936 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6939 sps->timing_info_present_flag = get_bits1(&s->gb);
6940 if(sps->timing_info_present_flag){
6941 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6942 sps->time_scale = get_bits_long(&s->gb, 32);
6943 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6946 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6947 if(nal_hrd_parameters_present_flag)
6948 decode_hrd_parameters(h, sps);
6949 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6950 if(vcl_hrd_parameters_present_flag)
6951 decode_hrd_parameters(h, sps);
6952 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6953 get_bits1(&s->gb); /* low_delay_hrd_flag */
6954 get_bits1(&s->gb); /* pic_struct_present_flag */
6956 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6957 if(sps->bitstream_restriction_flag){
6958 unsigned int num_reorder_frames;
6959 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6960 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6961 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6962 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6963 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6964 num_reorder_frames= get_ue_golomb(&s->gb);
6965 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6967 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6968 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6972 sps->num_reorder_frames= num_reorder_frames;
6978 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6979 const uint8_t *jvt_list, const uint8_t *fallback_list){
6980 MpegEncContext * const s = &h->s;
6981 int i, last = 8, next = 8;
6982 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6983 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6984 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6986 for(i=0;i<size;i++){
6988 next = (last + get_se_golomb(&s->gb)) & 0xff;
6989 if(!i && !next){ /* matrix not written, we use the preset one */
6990 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6993 last = factors[scan[i]] = next ? next : last;
6997 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6998 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6999 MpegEncContext * const s = &h->s;
7000 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7001 const uint8_t *fallback[4] = {
7002 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7003 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7004 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7005 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7007 if(get_bits1(&s->gb)){
7008 sps->scaling_matrix_present |= is_sps;
7009 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7010 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7011 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7012 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7013 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7014 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7015 if(is_sps || pps->transform_8x8_mode){
7016 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7017 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7019 } else if(fallback_sps) {
7020 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7021 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7026 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7029 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7030 const size_t size, const char *name)
7033 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7038 vec[id] = av_mallocz(size);
7040 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7045 static inline int decode_seq_parameter_set(H264Context *h){
7046 MpegEncContext * const s = &h->s;
7047 int profile_idc, level_idc;
7048 unsigned int sps_id, tmp, mb_width, mb_height;
7052 profile_idc= get_bits(&s->gb, 8);
7053 get_bits1(&s->gb); //constraint_set0_flag
7054 get_bits1(&s->gb); //constraint_set1_flag
7055 get_bits1(&s->gb); //constraint_set2_flag
7056 get_bits1(&s->gb); //constraint_set3_flag
7057 get_bits(&s->gb, 4); // reserved
7058 level_idc= get_bits(&s->gb, 8);
7059 sps_id= get_ue_golomb(&s->gb);
7061 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7065 sps->profile_idc= profile_idc;
7066 sps->level_idc= level_idc;
7068 if(sps->profile_idc >= 100){ //high profile
7069 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7070 if(sps->chroma_format_idc == 3)
7071 get_bits1(&s->gb); //residual_color_transform_flag
7072 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7073 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7074 sps->transform_bypass = get_bits1(&s->gb);
7075 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7077 sps->scaling_matrix_present = 0;
7078 sps->chroma_format_idc= 1;
7081 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7082 sps->poc_type= get_ue_golomb(&s->gb);
7084 if(sps->poc_type == 0){ //FIXME #define
7085 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7086 } else if(sps->poc_type == 1){//FIXME #define
7087 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7088 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7089 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7090 tmp= get_ue_golomb(&s->gb);
7092 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7093 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7096 sps->poc_cycle_length= tmp;
7098 for(i=0; i<sps->poc_cycle_length; i++)
7099 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7100 }else if(sps->poc_type != 2){
7101 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7105 tmp= get_ue_golomb(&s->gb);
7106 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7107 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7110 sps->ref_frame_count= tmp;
7111 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7112 mb_width= get_ue_golomb(&s->gb) + 1;
7113 mb_height= get_ue_golomb(&s->gb) + 1;
7114 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7115 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7116 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7119 sps->mb_width = mb_width;
7120 sps->mb_height= mb_height;
7122 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7123 if(!sps->frame_mbs_only_flag)
7124 sps->mb_aff= get_bits1(&s->gb);
7128 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7130 #ifndef ALLOW_INTERLACE
7132 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7134 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7135 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7137 sps->crop= get_bits1(&s->gb);
7139 sps->crop_left = get_ue_golomb(&s->gb);
7140 sps->crop_right = get_ue_golomb(&s->gb);
7141 sps->crop_top = get_ue_golomb(&s->gb);
7142 sps->crop_bottom= get_ue_golomb(&s->gb);
7143 if(sps->crop_left || sps->crop_top){
7144 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7146 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7147 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7153 sps->crop_bottom= 0;
7156 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7157 if( sps->vui_parameters_present_flag )
7158 decode_vui_parameters(h, sps);
7160 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7161 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7162 sps_id, sps->profile_idc, sps->level_idc,
7164 sps->ref_frame_count,
7165 sps->mb_width, sps->mb_height,
7166 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7167 sps->direct_8x8_inference_flag ? "8B8" : "",
7168 sps->crop_left, sps->crop_right,
7169 sps->crop_top, sps->crop_bottom,
7170 sps->vui_parameters_present_flag ? "VUI" : "",
7171 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7178 build_qp_table(PPS *pps, int t, int index)
7181 for(i = 0; i < 52; i++)
7182 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7185 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7186 MpegEncContext * const s = &h->s;
7187 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7190 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7194 tmp= get_ue_golomb(&s->gb);
7195 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7196 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7201 pps->cabac= get_bits1(&s->gb);
7202 pps->pic_order_present= get_bits1(&s->gb);
7203 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7204 if(pps->slice_group_count > 1 ){
7205 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7206 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7207 switch(pps->mb_slice_group_map_type){
7210 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7211 | run_length[ i ] |1 |ue(v) |
7216 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7218 | top_left_mb[ i ] |1 |ue(v) |
7219 | bottom_right_mb[ i ] |1 |ue(v) |
7227 | slice_group_change_direction_flag |1 |u(1) |
7228 | slice_group_change_rate_minus1 |1 |ue(v) |
7233 | slice_group_id_cnt_minus1 |1 |ue(v) |
7234 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7236 | slice_group_id[ i ] |1 |u(v) |
7241 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7242 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7243 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7244 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7245 pps->ref_count[0]= pps->ref_count[1]= 1;
7249 pps->weighted_pred= get_bits1(&s->gb);
7250 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7251 pps->init_qp= get_se_golomb(&s->gb) + 26;
7252 pps->init_qs= get_se_golomb(&s->gb) + 26;
7253 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7254 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7255 pps->constrained_intra_pred= get_bits1(&s->gb);
7256 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7258 pps->transform_8x8_mode= 0;
7259 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7260 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7261 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7263 if(get_bits_count(&s->gb) < bit_length){
7264 pps->transform_8x8_mode= get_bits1(&s->gb);
7265 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7266 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7268 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7271 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7272 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7273 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7274 h->pps.chroma_qp_diff= 1;
7276 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7277 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7278 pps_id, pps->sps_id,
7279 pps->cabac ? "CABAC" : "CAVLC",
7280 pps->slice_group_count,
7281 pps->ref_count[0], pps->ref_count[1],
7282 pps->weighted_pred ? "weighted" : "",
7283 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7284 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7285 pps->constrained_intra_pred ? "CONSTR" : "",
7286 pps->redundant_pic_cnt_present ? "REDU" : "",
7287 pps->transform_8x8_mode ? "8x8DCT" : ""
7295 * Call decode_slice() for each context.
7297 * @param h h264 master context
7298 * @param context_count number of contexts to execute
7300 static void execute_decode_slices(H264Context *h, int context_count){
7301 MpegEncContext * const s = &h->s;
7302 AVCodecContext * const avctx= s->avctx;
7306 if(context_count == 1) {
7307 decode_slice(avctx, h);
7309 for(i = 1; i < context_count; i++) {
7310 hx = h->thread_context[i];
7311 hx->s.error_resilience = avctx->error_resilience;
7312 hx->s.error_count = 0;
7315 avctx->execute(avctx, (void *)decode_slice,
7316 (void **)h->thread_context, NULL, context_count);
7318 /* pull back stuff from slices to master context */
7319 hx = h->thread_context[context_count - 1];
7320 s->mb_x = hx->s.mb_x;
7321 s->mb_y = hx->s.mb_y;
7322 s->dropable = hx->s.dropable;
7323 s->picture_structure = hx->s.picture_structure;
7324 for(i = 1; i < context_count; i++)
7325 h->s.error_count += h->thread_context[i]->s.error_count;
7330 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7331 MpegEncContext * const s = &h->s;
7332 AVCodecContext * const avctx= s->avctx;
7334 H264Context *hx; ///< thread context
7335 int context_count = 0;
7337 h->max_contexts = avctx->thread_count;
7340 for(i=0; i<50; i++){
7341 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7344 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7345 h->current_slice = 0;
7346 if (!s->first_field)
7347 s->current_picture_ptr= NULL;
7359 if(buf_index >= buf_size) break;
7361 for(i = 0; i < h->nal_length_size; i++)
7362 nalsize = (nalsize << 8) | buf[buf_index++];
7363 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7368 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7373 // start code prefix search
7374 for(; buf_index + 3 < buf_size; buf_index++){
7375 // This should always succeed in the first iteration.
7376 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7380 if(buf_index+3 >= buf_size) break;
7385 hx = h->thread_context[context_count];
7387 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7388 if (ptr==NULL || dst_length < 0){
7391 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7393 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7395 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7396 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7399 if (h->is_avc && (nalsize != consumed)){
7400 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7404 buf_index += consumed;
7406 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7407 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7412 switch(hx->nal_unit_type){
7414 if (h->nal_unit_type != NAL_IDR_SLICE) {
7415 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7418 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7420 init_get_bits(&hx->s.gb, ptr, bit_length);
7422 hx->inter_gb_ptr= &hx->s.gb;
7423 hx->s.data_partitioning = 0;
7425 if((err = decode_slice_header(hx, h)))
7428 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7429 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7430 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7431 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7432 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7433 && avctx->skip_frame < AVDISCARD_ALL)
7437 init_get_bits(&hx->s.gb, ptr, bit_length);
7439 hx->inter_gb_ptr= NULL;
7440 hx->s.data_partitioning = 1;
7442 err = decode_slice_header(hx, h);
7445 init_get_bits(&hx->intra_gb, ptr, bit_length);
7446 hx->intra_gb_ptr= &hx->intra_gb;
7449 init_get_bits(&hx->inter_gb, ptr, bit_length);
7450 hx->inter_gb_ptr= &hx->inter_gb;
7452 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7453 && s->context_initialized
7455 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7456 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7457 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7458 && avctx->skip_frame < AVDISCARD_ALL)
7462 init_get_bits(&s->gb, ptr, bit_length);
7466 init_get_bits(&s->gb, ptr, bit_length);
7467 decode_seq_parameter_set(h);
7469 if(s->flags& CODEC_FLAG_LOW_DELAY)
7472 if(avctx->has_b_frames < 2)
7473 avctx->has_b_frames= !s->low_delay;
7476 init_get_bits(&s->gb, ptr, bit_length);
7478 decode_picture_parameter_set(h, bit_length);
7482 case NAL_END_SEQUENCE:
7483 case NAL_END_STREAM:
7484 case NAL_FILLER_DATA:
7486 case NAL_AUXILIARY_SLICE:
7489 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7492 if(context_count == h->max_contexts) {
7493 execute_decode_slices(h, context_count);
7498 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7500 /* Slice could not be decoded in parallel mode, copy down
7501 * NAL unit stuff to context 0 and restart. Note that
7502 * rbsp_buffer is not transferred, but since we no longer
7503 * run in parallel mode this should not be an issue. */
7504 h->nal_unit_type = hx->nal_unit_type;
7505 h->nal_ref_idc = hx->nal_ref_idc;
7511 execute_decode_slices(h, context_count);
7516 * returns the number of bytes consumed for building the current frame
7518 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7519 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7520 if(pos+10>buf_size) pos=buf_size; // oops ;)
7525 static int decode_frame(AVCodecContext *avctx,
7526 void *data, int *data_size,
7527 const uint8_t *buf, int buf_size)
7529 H264Context *h = avctx->priv_data;
7530 MpegEncContext *s = &h->s;
7531 AVFrame *pict = data;
7534 s->flags= avctx->flags;
7535 s->flags2= avctx->flags2;
7537 /* end of stream, output what is still in the buffers */
7538 if (buf_size == 0) {
7542 //FIXME factorize this with the output code below
7543 out = h->delayed_pic[0];
7545 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7546 if(h->delayed_pic[i]->poc < out->poc){
7547 out = h->delayed_pic[i];
7551 for(i=out_idx; h->delayed_pic[i]; i++)
7552 h->delayed_pic[i] = h->delayed_pic[i+1];
7555 *data_size = sizeof(AVFrame);
7556 *pict= *(AVFrame*)out;
7562 if(h->is_avc && !h->got_avcC) {
7563 int i, cnt, nalsize;
7564 unsigned char *p = avctx->extradata;
7565 if(avctx->extradata_size < 7) {
7566 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7570 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7573 /* sps and pps in the avcC always have length coded with 2 bytes,
7574 so put a fake nal_length_size = 2 while parsing them */
7575 h->nal_length_size = 2;
7576 // Decode sps from avcC
7577 cnt = *(p+5) & 0x1f; // Number of sps
7579 for (i = 0; i < cnt; i++) {
7580 nalsize = AV_RB16(p) + 2;
7581 if(decode_nal_units(h, p, nalsize) < 0) {
7582 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7587 // Decode pps from avcC
7588 cnt = *(p++); // Number of pps
7589 for (i = 0; i < cnt; i++) {
7590 nalsize = AV_RB16(p) + 2;
7591 if(decode_nal_units(h, p, nalsize) != nalsize) {
7592 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7597 // Now store right nal length size, that will be use to parse all other nals
7598 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7599 // Do not reparse avcC
7603 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7604 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7608 buf_index=decode_nal_units(h, buf, buf_size);
7612 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7613 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7614 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7618 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7619 Picture *out = s->current_picture_ptr;
7620 Picture *cur = s->current_picture_ptr;
7621 int i, pics, cross_idr, out_of_order, out_idx;
7625 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7626 s->current_picture_ptr->pict_type= s->pict_type;
7629 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7630 h->prev_poc_msb= h->poc_msb;
7631 h->prev_poc_lsb= h->poc_lsb;
7633 h->prev_frame_num_offset= h->frame_num_offset;
7634 h->prev_frame_num= h->frame_num;
7637 * FIXME: Error handling code does not seem to support interlaced
7638 * when slices span multiple rows
7639 * The ff_er_add_slice calls don't work right for bottom
7640 * fields; they cause massive erroneous error concealing
7641 * Error marking covers both fields (top and bottom).
7642 * This causes a mismatched s->error_count
7643 * and a bad error table. Further, the error count goes to
7644 * INT_MAX when called for bottom field, because mb_y is
7645 * past end by one (callers fault) and resync_mb_y != 0
7646 * causes problems for the first MB line, too.
7653 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7654 /* Wait for second field. */
7658 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7659 /* Derive top_field_first from field pocs. */
7660 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7662 //FIXME do something with unavailable reference frames
7664 /* Sort B-frames into display order */
7666 if(h->sps.bitstream_restriction_flag
7667 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7668 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7672 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7673 && !h->sps.bitstream_restriction_flag){
7674 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7679 while(h->delayed_pic[pics]) pics++;
7681 assert(pics <= MAX_DELAYED_PIC_COUNT);
7683 h->delayed_pic[pics++] = cur;
7684 if(cur->reference == 0)
7685 cur->reference = DELAYED_PIC_REF;
7687 out = h->delayed_pic[0];
7689 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7690 if(h->delayed_pic[i]->poc < out->poc){
7691 out = h->delayed_pic[i];
7694 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
7696 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7698 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7700 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7702 ((!cross_idr && out->poc > h->outputed_poc + 2)
7703 || cur->pict_type == FF_B_TYPE)))
7706 s->avctx->has_b_frames++;
7709 if(out_of_order || pics > s->avctx->has_b_frames){
7710 out->reference &= ~DELAYED_PIC_REF;
7711 for(i=out_idx; h->delayed_pic[i]; i++)
7712 h->delayed_pic[i] = h->delayed_pic[i+1];
7714 if(!out_of_order && pics > s->avctx->has_b_frames){
7715 *data_size = sizeof(AVFrame);
7717 h->outputed_poc = out->poc;
7718 *pict= *(AVFrame*)out;
7720 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7725 assert(pict->data[0] || !*data_size);
7726 ff_print_debug_info(s, pict);
7727 //printf("out %d\n", (int)pict->data[0]);
7730 /* Return the Picture timestamp as the frame number */
7731 /* we subtract 1 because it is added on utils.c */
7732 avctx->frame_number = s->picture_number - 1;
7734 return get_consumed_bytes(s, buf_index, buf_size);
7737 static inline void fill_mb_avail(H264Context *h){
7738 MpegEncContext * const s = &h->s;
7739 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7742 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7743 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7744 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7750 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7751 h->mb_avail[4]= 1; //FIXME move out
7752 h->mb_avail[5]= 0; //FIXME move out
7760 #define SIZE (COUNT*40)
7766 // int int_temp[10000];
7768 AVCodecContext avctx;
7770 dsputil_init(&dsp, &avctx);
7772 init_put_bits(&pb, temp, SIZE);
7773 printf("testing unsigned exp golomb\n");
7774 for(i=0; i<COUNT; i++){
7776 set_ue_golomb(&pb, i);
7777 STOP_TIMER("set_ue_golomb");
7779 flush_put_bits(&pb);
7781 init_get_bits(&gb, temp, 8*SIZE);
7782 for(i=0; i<COUNT; i++){
7785 s= show_bits(&gb, 24);
7788 j= get_ue_golomb(&gb);
7790 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7793 STOP_TIMER("get_ue_golomb");
7797 init_put_bits(&pb, temp, SIZE);
7798 printf("testing signed exp golomb\n");
7799 for(i=0; i<COUNT; i++){
7801 set_se_golomb(&pb, i - COUNT/2);
7802 STOP_TIMER("set_se_golomb");
7804 flush_put_bits(&pb);
7806 init_get_bits(&gb, temp, 8*SIZE);
7807 for(i=0; i<COUNT; i++){
7810 s= show_bits(&gb, 24);
7813 j= get_se_golomb(&gb);
7814 if(j != i - COUNT/2){
7815 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7818 STOP_TIMER("get_se_golomb");
7822 printf("testing 4x4 (I)DCT\n");
7825 uint8_t src[16], ref[16];
7826 uint64_t error= 0, max_error=0;
7828 for(i=0; i<COUNT; i++){
7830 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7831 for(j=0; j<16; j++){
7832 ref[j]= random()%255;
7833 src[j]= random()%255;
7836 h264_diff_dct_c(block, src, ref, 4);
7839 for(j=0; j<16; j++){
7840 // printf("%d ", block[j]);
7841 block[j]= block[j]*4;
7842 if(j&1) block[j]= (block[j]*4 + 2)/5;
7843 if(j&4) block[j]= (block[j]*4 + 2)/5;
7847 s->dsp.h264_idct_add(ref, block, 4);
7848 /* for(j=0; j<16; j++){
7849 printf("%d ", ref[j]);
7853 for(j=0; j<16; j++){
7854 int diff= FFABS(src[j] - ref[j]);
7857 max_error= FFMAX(max_error, diff);
7860 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7861 printf("testing quantizer\n");
7862 for(qp=0; qp<52; qp++){
7864 src1_block[i]= src2_block[i]= random()%255;
7867 printf("Testing NAL layer\n");
7869 uint8_t bitstream[COUNT];
7870 uint8_t nal[COUNT*2];
7872 memset(&h, 0, sizeof(H264Context));
7874 for(i=0; i<COUNT; i++){
7882 for(j=0; j<COUNT; j++){
7883 bitstream[j]= (random() % 255) + 1;
7886 for(j=0; j<zeros; j++){
7887 int pos= random() % COUNT;
7888 while(bitstream[pos] == 0){
7897 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7899 printf("encoding failed\n");
7903 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7907 if(out_length != COUNT){
7908 printf("incorrect length %d %d\n", out_length, COUNT);
7912 if(consumed != nal_length){
7913 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7917 if(memcmp(bitstream, out, COUNT)){
7918 printf("mismatch\n");
7924 printf("Testing RBSP\n");
7932 static av_cold int decode_end(AVCodecContext *avctx)
7934 H264Context *h = avctx->priv_data;
7935 MpegEncContext *s = &h->s;
7937 av_freep(&h->rbsp_buffer[0]);
7938 av_freep(&h->rbsp_buffer[1]);
7939 free_tables(h); //FIXME cleanup init stuff perhaps
7942 // memset(h, 0, sizeof(H264Context));
7948 AVCodec h264_decoder = {
7952 sizeof(H264Context),
7957 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7959 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),