2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
64 static Picture * remove_long(H264Context *h, int i, int ref_mask);
66 static av_always_inline uint32_t pack16to32(int a, int b){
67 #ifdef WORDS_BIGENDIAN
68 return (b&0xFFFF) + (a<<16);
70 return (a&0xFFFF) + (b<<16);
74 const uint8_t ff_rem6[52]={
75 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
78 const uint8_t ff_div6[52]={
79 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static const int left_block_options[4][8]={
89 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
90 MpegEncContext * const s = &h->s;
91 const int mb_xy= h->mb_xy;
92 int topleft_xy, top_xy, topright_xy, left_xy[2];
93 int topleft_type, top_type, topright_type, left_type[2];
95 int topleft_partition= -1;
98 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
100 //FIXME deblocking could skip the intra and nnz parts.
101 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
104 /* Wow, what a mess, why didn't they simplify the interlacing & intra
105 * stuff, I can't imagine that these complex rules are worth it. */
107 topleft_xy = top_xy - 1;
108 topright_xy= top_xy + 1;
109 left_xy[1] = left_xy[0] = mb_xy-1;
110 left_block = left_block_options[0];
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
137 topleft_partition = 0;
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
149 left_block = left_block_options[1];
151 left_block= left_block_options[2];
154 left_xy[1] += s->mb_stride;
155 left_block = left_block_options[3];
160 h->top_mb_xy = top_xy;
161 h->left_mb_xy[0] = left_xy[0];
162 h->left_mb_xy[1] = left_xy[1];
166 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
167 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
168 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
170 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
172 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
174 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
175 for(list=0; list<h->list_count; list++){
176 if(USES_LIST(mb_type,list)){
177 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
178 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
179 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
180 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
186 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
187 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
189 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
190 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
192 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
193 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
198 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
199 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
200 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
201 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
202 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
205 if(IS_INTRA(mb_type)){
206 h->topleft_samples_available=
207 h->top_samples_available=
208 h->left_samples_available= 0xFFFF;
209 h->topright_samples_available= 0xEEEA;
211 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
212 h->topleft_samples_available= 0xB3FF;
213 h->top_samples_available= 0x33FF;
214 h->topright_samples_available= 0x26EA;
217 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
218 h->topleft_samples_available&= 0xDF5F;
219 h->left_samples_available&= 0x5F5F;
223 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
224 h->topleft_samples_available&= 0x7FFF;
226 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
227 h->topright_samples_available&= 0xFBFF;
229 if(IS_INTRA4x4(mb_type)){
230 if(IS_INTRA4x4(top_type)){
231 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
232 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
233 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
234 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
237 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
242 h->intra4x4_pred_mode_cache[4+8*0]=
243 h->intra4x4_pred_mode_cache[5+8*0]=
244 h->intra4x4_pred_mode_cache[6+8*0]=
245 h->intra4x4_pred_mode_cache[7+8*0]= pred;
248 if(IS_INTRA4x4(left_type[i])){
249 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
250 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
253 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
258 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
259 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
274 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
276 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
277 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
278 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
279 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
281 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
282 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
284 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
285 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
288 h->non_zero_count_cache[4+8*0]=
289 h->non_zero_count_cache[5+8*0]=
290 h->non_zero_count_cache[6+8*0]=
291 h->non_zero_count_cache[7+8*0]=
293 h->non_zero_count_cache[1+8*0]=
294 h->non_zero_count_cache[2+8*0]=
296 h->non_zero_count_cache[1+8*3]=
297 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
301 for (i=0; i<2; i++) {
303 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
304 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
305 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
306 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
308 h->non_zero_count_cache[3+8*1 + 2*8*i]=
309 h->non_zero_count_cache[3+8*2 + 2*8*i]=
310 h->non_zero_count_cache[0+8*1 + 8*i]=
311 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
318 h->top_cbp = h->cbp_table[top_xy];
319 } else if(IS_INTRA(mb_type)) {
326 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
327 } else if(IS_INTRA(mb_type)) {
333 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
336 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
341 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
343 for(list=0; list<h->list_count; list++){
344 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
345 /*if(!h->mv_cache_clean[list]){
346 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
347 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
348 h->mv_cache_clean[list]= 1;
352 h->mv_cache_clean[list]= 0;
354 if(USES_LIST(top_type, list)){
355 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
356 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
357 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
358 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
359 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
360 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
361 h->ref_cache[list][scan8[0] + 0 - 1*8]=
362 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
363 h->ref_cache[list][scan8[0] + 2 - 1*8]=
364 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
366 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
367 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
368 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
369 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
370 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
374 int cache_idx = scan8[0] - 1 + i*2*8;
375 if(USES_LIST(left_type[i], list)){
376 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
377 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
378 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
379 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
380 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
381 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
383 *(uint32_t*)h->mv_cache [list][cache_idx ]=
384 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
385 h->ref_cache[list][cache_idx ]=
386 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
390 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
393 if(USES_LIST(topleft_type, list)){
394 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
395 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
396 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
397 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
399 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
400 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
403 if(USES_LIST(topright_type, list)){
404 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
405 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
406 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
407 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
409 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
410 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
413 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
416 h->ref_cache[list][scan8[5 ]+1] =
417 h->ref_cache[list][scan8[7 ]+1] =
418 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
419 h->ref_cache[list][scan8[4 ]] =
420 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
421 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
422 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
423 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
424 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
425 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
428 /* XXX beurk, Load mvd */
429 if(USES_LIST(top_type, list)){
430 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
431 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
432 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
433 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
434 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
436 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
437 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
438 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
439 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
441 if(USES_LIST(left_type[0], list)){
442 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
443 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
444 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
446 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
447 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
449 if(USES_LIST(left_type[1], list)){
450 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
451 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
454 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
457 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
458 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
459 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
460 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
461 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
463 if(h->slice_type_nos == FF_B_TYPE){
464 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
466 if(IS_DIRECT(top_type)){
467 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
468 }else if(IS_8X8(top_type)){
469 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
470 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
471 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
473 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
476 if(IS_DIRECT(left_type[0]))
477 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
478 else if(IS_8X8(left_type[0]))
479 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
481 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
483 if(IS_DIRECT(left_type[1]))
484 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
485 else if(IS_8X8(left_type[1]))
486 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
488 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
494 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
495 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
496 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
497 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
498 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
499 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
500 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
501 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
502 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
503 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
505 #define MAP_F2F(idx, mb_type)\
506 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
507 h->ref_cache[list][idx] <<= 1;\
508 h->mv_cache[list][idx][1] /= 2;\
509 h->mvd_cache[list][idx][1] /= 2;\
514 #define MAP_F2F(idx, mb_type)\
515 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
516 h->ref_cache[list][idx] >>= 1;\
517 h->mv_cache[list][idx][1] <<= 1;\
518 h->mvd_cache[list][idx][1] <<= 1;\
528 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
531 static inline void write_back_intra_pred_mode(H264Context *h){
532 const int mb_xy= h->mb_xy;
534 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
535 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
536 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
537 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
538 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
539 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
540 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
544 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
546 static inline int check_intra4x4_pred_mode(H264Context *h){
547 MpegEncContext * const s = &h->s;
548 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
549 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
552 if(!(h->top_samples_available&0x8000)){
554 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
556 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
559 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
564 if(!(h->left_samples_available&0x8000)){
566 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
568 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
571 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
577 } //FIXME cleanup like next
580 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
582 static inline int check_intra_pred_mode(H264Context *h, int mode){
583 MpegEncContext * const s = &h->s;
584 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
585 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
588 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
592 if(!(h->top_samples_available&0x8000)){
595 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
600 if(!(h->left_samples_available&0x8000)){
603 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
612 * gets the predicted intra4x4 prediction mode.
614 static inline int pred_intra_mode(H264Context *h, int n){
615 const int index8= scan8[n];
616 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
617 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
618 const int min= FFMIN(left, top);
620 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
622 if(min<0) return DC_PRED;
626 static inline void write_back_non_zero_count(H264Context *h){
627 const int mb_xy= h->mb_xy;
629 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
630 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
631 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
632 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
633 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
634 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
635 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
637 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
638 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
639 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
641 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
642 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
643 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
646 // store all luma nnzs, for deblocking
649 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
650 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
655 * gets the predicted number of non-zero coefficients.
656 * @param n block index
658 static inline int pred_non_zero_count(H264Context *h, int n){
659 const int index8= scan8[n];
660 const int left= h->non_zero_count_cache[index8 - 1];
661 const int top = h->non_zero_count_cache[index8 - 8];
664 if(i<64) i= (i+1)>>1;
666 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
671 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
672 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
673 MpegEncContext *s = &h->s;
675 /* there is no consistent mapping of mvs to neighboring locations that will
676 * make mbaff happy, so we can't move all this logic to fill_caches */
678 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
680 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
681 *C = h->mv_cache[list][scan8[0]-2];
684 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
685 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
686 if(IS_INTERLACED(mb_types[topright_xy])){
687 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
688 const int x4 = X4, y4 = Y4;\
689 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
690 if(!USES_LIST(mb_type,list))\
691 return LIST_NOT_USED;\
692 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
693 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
694 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
695 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
697 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
700 if(topright_ref == PART_NOT_AVAILABLE
701 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
702 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
704 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
705 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
708 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
710 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
711 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
717 if(topright_ref != PART_NOT_AVAILABLE){
718 *C= h->mv_cache[list][ i - 8 + part_width ];
721 tprintf(s->avctx, "topright MV not available\n");
723 *C= h->mv_cache[list][ i - 8 - 1 ];
724 return h->ref_cache[list][ i - 8 - 1 ];
729 * gets the predicted MV.
730 * @param n the block index
731 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
732 * @param mx the x component of the predicted motion vector
733 * @param my the y component of the predicted motion vector
735 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
736 const int index8= scan8[n];
737 const int top_ref= h->ref_cache[list][ index8 - 8 ];
738 const int left_ref= h->ref_cache[list][ index8 - 1 ];
739 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
740 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
742 int diagonal_ref, match_count;
744 assert(part_width==1 || part_width==2 || part_width==4);
754 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
755 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
756 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
757 if(match_count > 1){ //most common
758 *mx= mid_pred(A[0], B[0], C[0]);
759 *my= mid_pred(A[1], B[1], C[1]);
760 }else if(match_count==1){
764 }else if(top_ref==ref){
772 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
776 *mx= mid_pred(A[0], B[0], C[0]);
777 *my= mid_pred(A[1], B[1], C[1]);
781 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
785 * gets the directionally predicted 16x8 MV.
786 * @param n the block index
787 * @param mx the x component of the predicted motion vector
788 * @param my the y component of the predicted motion vector
790 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
792 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
793 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
795 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
803 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
804 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
806 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
816 pred_motion(h, n, 4, list, ref, mx, my);
820 * gets the directionally predicted 8x16 MV.
821 * @param n the block index
822 * @param mx the x component of the predicted motion vector
823 * @param my the y component of the predicted motion vector
825 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
827 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
828 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
830 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
841 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
843 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
845 if(diagonal_ref == ref){
853 pred_motion(h, n, 2, list, ref, mx, my);
856 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
857 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
858 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
860 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
862 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
863 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
864 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
870 pred_motion(h, 0, 4, 0, 0, mx, my);
875 static inline void direct_dist_scale_factor(H264Context * const h){
876 MpegEncContext * const s = &h->s;
877 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
878 const int poc1 = h->ref_list[1][0].poc;
880 for(i=0; i<h->ref_count[0]; i++){
881 int poc0 = h->ref_list[0][i].poc;
882 int td = av_clip(poc1 - poc0, -128, 127);
883 if(td == 0 /* FIXME || pic0 is a long-term ref */){
884 h->dist_scale_factor[i] = 256;
886 int tb = av_clip(poc - poc0, -128, 127);
887 int tx = (16384 + (FFABS(td) >> 1)) / td;
888 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
892 for(i=0; i<h->ref_count[0]; i++){
893 h->dist_scale_factor_field[2*i] =
894 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
898 static inline void direct_ref_list_init(H264Context * const h){
899 MpegEncContext * const s = &h->s;
900 Picture * const ref1 = &h->ref_list[1][0];
901 Picture * const cur = s->current_picture_ptr;
903 int sidx= s->picture_structure&1;
904 if(cur->pict_type == FF_I_TYPE)
905 cur->ref_count[sidx][0] = 0;
906 if(cur->pict_type != FF_B_TYPE)
907 cur->ref_count[sidx][1] = 0;
908 for(list=0; list<2; list++){
909 cur->ref_count[sidx][list] = h->ref_count[list];
910 for(j=0; j<h->ref_count[list]; j++)
911 cur->ref_poc[sidx][list][j] = h->ref_list[list][j].poc;
913 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
915 for(list=0; list<2; list++){
916 for(i=0; i<ref1->ref_count[sidx][list]; i++){
917 const int poc = ref1->ref_poc[sidx][list][i];
918 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
919 for(j=0; j<h->ref_count[list]; j++)
920 if(h->ref_list[list][j].poc == poc){
921 h->map_col_to_list0[list][i] = j;
927 for(list=0; list<2; list++){
928 for(i=0; i<ref1->ref_count[sidx][list]; i++){
929 j = h->map_col_to_list0[list][i];
930 h->map_col_to_list0_field[list][2*i] = 2*j;
931 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
937 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
938 MpegEncContext * const s = &h->s;
939 const int mb_xy = h->mb_xy;
940 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
941 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
942 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
943 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
944 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
945 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
946 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
947 const int is_b8x8 = IS_8X8(*mb_type);
948 unsigned int sub_mb_type;
951 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
952 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
953 /* FIXME save sub mb types from previous frames (or derive from MVs)
954 * so we know exactly what block size to use */
955 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
956 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
957 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
958 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
959 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
961 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
962 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
965 *mb_type |= MB_TYPE_DIRECT2;
967 *mb_type |= MB_TYPE_INTERLACED;
969 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
971 if(h->direct_spatial_mv_pred){
976 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
978 /* ref = min(neighbors) */
979 for(list=0; list<2; list++){
980 int refa = h->ref_cache[list][scan8[0] - 1];
981 int refb = h->ref_cache[list][scan8[0] - 8];
982 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
983 if(refc == PART_NOT_AVAILABLE)
984 refc = h->ref_cache[list][scan8[0] - 8 - 1];
985 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
990 if(ref[0] < 0 && ref[1] < 0){
992 mv[0][0] = mv[0][1] =
993 mv[1][0] = mv[1][1] = 0;
995 for(list=0; list<2; list++){
997 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
999 mv[list][0] = mv[list][1] = 0;
1005 *mb_type &= ~MB_TYPE_L1;
1006 sub_mb_type &= ~MB_TYPE_L1;
1007 }else if(ref[0] < 0){
1009 *mb_type &= ~MB_TYPE_L0;
1010 sub_mb_type &= ~MB_TYPE_L0;
1013 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1014 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1015 int mb_types_col[2];
1016 int b8_stride = h->b8_stride;
1017 int b4_stride = h->b_stride;
1019 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1021 if(IS_INTERLACED(*mb_type)){
1022 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1023 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1025 l1ref0 -= 2*b8_stride;
1026 l1ref1 -= 2*b8_stride;
1027 l1mv0 -= 4*b4_stride;
1028 l1mv1 -= 4*b4_stride;
1033 int cur_poc = s->current_picture_ptr->poc;
1034 int *col_poc = h->ref_list[1]->field_poc;
1035 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1036 int dy = 2*col_parity - (s->mb_y&1);
1038 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1039 l1ref0 += dy*b8_stride;
1040 l1ref1 += dy*b8_stride;
1041 l1mv0 += 2*dy*b4_stride;
1042 l1mv1 += 2*dy*b4_stride;
1046 for(i8=0; i8<4; i8++){
1049 int xy8 = x8+y8*b8_stride;
1050 int xy4 = 3*x8+y8*b4_stride;
1053 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1055 h->sub_mb_type[i8] = sub_mb_type;
1057 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1058 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1059 if(!IS_INTRA(mb_types_col[y8])
1060 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1061 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1063 a= pack16to32(mv[0][0],mv[0][1]);
1065 b= pack16to32(mv[1][0],mv[1][1]);
1067 a= pack16to32(mv[0][0],mv[0][1]);
1068 b= pack16to32(mv[1][0],mv[1][1]);
1070 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1071 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1073 }else if(IS_16X16(*mb_type)){
1076 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1077 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1078 if(!IS_INTRA(mb_type_col)
1079 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1080 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1081 && (h->x264_build>33 || !h->x264_build)))){
1083 a= pack16to32(mv[0][0],mv[0][1]);
1085 b= pack16to32(mv[1][0],mv[1][1]);
1087 a= pack16to32(mv[0][0],mv[0][1]);
1088 b= pack16to32(mv[1][0],mv[1][1]);
1090 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1091 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1093 for(i8=0; i8<4; i8++){
1094 const int x8 = i8&1;
1095 const int y8 = i8>>1;
1097 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1099 h->sub_mb_type[i8] = sub_mb_type;
1101 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1102 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1103 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1104 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1107 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1108 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1109 && (h->x264_build>33 || !h->x264_build)))){
1110 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1111 if(IS_SUB_8X8(sub_mb_type)){
1112 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1113 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1115 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1117 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1120 for(i4=0; i4<4; i4++){
1121 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1122 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1124 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1126 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1132 }else{ /* direct temporal mv pred */
1133 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1134 const int *dist_scale_factor = h->dist_scale_factor;
1137 if(IS_INTERLACED(*mb_type)){
1138 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1139 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1140 dist_scale_factor = h->dist_scale_factor_field;
1142 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1143 /* FIXME assumes direct_8x8_inference == 1 */
1144 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1145 int mb_types_col[2];
1148 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1149 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1150 | (*mb_type & MB_TYPE_INTERLACED);
1151 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1153 if(IS_INTERLACED(*mb_type)){
1154 /* frame to field scaling */
1155 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1156 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1158 l1ref0 -= 2*h->b8_stride;
1159 l1ref1 -= 2*h->b8_stride;
1160 l1mv0 -= 4*h->b_stride;
1161 l1mv1 -= 4*h->b_stride;
1165 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1166 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1168 *mb_type |= MB_TYPE_16x8;
1170 *mb_type |= MB_TYPE_8x8;
1172 /* field to frame scaling */
1173 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1174 * but in MBAFF, top and bottom POC are equal */
1175 int dy = (s->mb_y&1) ? 1 : 2;
1177 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1178 l1ref0 += dy*h->b8_stride;
1179 l1ref1 += dy*h->b8_stride;
1180 l1mv0 += 2*dy*h->b_stride;
1181 l1mv1 += 2*dy*h->b_stride;
1184 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1186 *mb_type |= MB_TYPE_16x16;
1188 *mb_type |= MB_TYPE_8x8;
1191 for(i8=0; i8<4; i8++){
1192 const int x8 = i8&1;
1193 const int y8 = i8>>1;
1195 const int16_t (*l1mv)[2]= l1mv0;
1197 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1199 h->sub_mb_type[i8] = sub_mb_type;
1201 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1202 if(IS_INTRA(mb_types_col[y8])){
1203 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1204 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1205 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1209 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1211 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1213 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1216 scale = dist_scale_factor[ref0];
1217 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1220 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1221 int my_col = (mv_col[1]<<y_shift)/2;
1222 int mx = (scale * mv_col[0] + 128) >> 8;
1223 int my = (scale * my_col + 128) >> 8;
1224 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1225 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1232 /* one-to-one mv scaling */
1234 if(IS_16X16(*mb_type)){
1237 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1238 if(IS_INTRA(mb_type_col)){
1241 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1242 : map_col_to_list0[1][l1ref1[0]];
1243 const int scale = dist_scale_factor[ref0];
1244 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1246 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1247 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1249 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1250 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1252 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1253 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1254 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1256 for(i8=0; i8<4; i8++){
1257 const int x8 = i8&1;
1258 const int y8 = i8>>1;
1260 const int16_t (*l1mv)[2]= l1mv0;
1262 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1264 h->sub_mb_type[i8] = sub_mb_type;
1265 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1266 if(IS_INTRA(mb_type_col)){
1267 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1268 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1269 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1273 ref0 = l1ref0[x8 + y8*h->b8_stride];
1275 ref0 = map_col_to_list0[0][ref0];
1277 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1280 scale = dist_scale_factor[ref0];
1282 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1283 if(IS_SUB_8X8(sub_mb_type)){
1284 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1285 int mx = (scale * mv_col[0] + 128) >> 8;
1286 int my = (scale * mv_col[1] + 128) >> 8;
1287 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1288 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1290 for(i4=0; i4<4; i4++){
1291 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1292 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1293 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1294 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1295 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1296 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1303 static inline void write_back_motion(H264Context *h, int mb_type){
1304 MpegEncContext * const s = &h->s;
1305 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1306 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1309 if(!USES_LIST(mb_type, 0))
1310 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1312 for(list=0; list<h->list_count; list++){
1314 if(!USES_LIST(mb_type, list))
1318 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1319 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1321 if( h->pps.cabac ) {
1322 if(IS_SKIP(mb_type))
1323 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1326 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1327 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1332 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1333 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1334 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1335 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1336 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1340 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1341 if(IS_8X8(mb_type)){
1342 uint8_t *direct_table = &h->direct_table[b8_xy];
1343 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1344 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1345 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1351 * Decodes a network abstraction layer unit.
1352 * @param consumed is the number of bytes used as input
1353 * @param length is the length of the array
1354 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1355 * @returns decoded bytes, might be src+1 if no escapes
1357 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1362 // src[0]&0x80; //forbidden bit
1363 h->nal_ref_idc= src[0]>>5;
1364 h->nal_unit_type= src[0]&0x1F;
1368 for(i=0; i<length; i++)
1369 printf("%2X ", src[i]);
1371 for(i=0; i+1<length; i+=2){
1372 if(src[i]) continue;
1373 if(i>0 && src[i-1]==0) i--;
1374 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1376 /* startcode, so we must be past the end */
1383 if(i>=length-1){ //no escaped 0
1384 *dst_length= length;
1385 *consumed= length+1; //+1 for the header
1389 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1390 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1391 dst= h->rbsp_buffer[bufidx];
1397 //printf("decoding esc\n");
1400 //remove escapes (very rare 1:2^22)
1401 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1402 if(src[si+2]==3){ //escape
1407 }else //next start code
1411 dst[di++]= src[si++];
1415 *consumed= si + 1;//+1 for the header
1416 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1421 * identifies the exact end of the bitstream
1422 * @return the length of the trailing, or 0 if damaged
1424 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1428 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1438 * IDCT transforms the 16 dc values and dequantizes them.
1439 * @param qp quantization parameter
1441 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1444 int temp[16]; //FIXME check if this is a good idea
1445 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1446 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1448 //memset(block, 64, 2*256);
1451 const int offset= y_offset[i];
1452 const int z0= block[offset+stride*0] + block[offset+stride*4];
1453 const int z1= block[offset+stride*0] - block[offset+stride*4];
1454 const int z2= block[offset+stride*1] - block[offset+stride*5];
1455 const int z3= block[offset+stride*1] + block[offset+stride*5];
1464 const int offset= x_offset[i];
1465 const int z0= temp[4*0+i] + temp[4*2+i];
1466 const int z1= temp[4*0+i] - temp[4*2+i];
1467 const int z2= temp[4*1+i] - temp[4*3+i];
1468 const int z3= temp[4*1+i] + temp[4*3+i];
1470 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1471 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1472 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1473 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1479 * DCT transforms the 16 dc values.
1480 * @param qp quantization parameter ??? FIXME
1482 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1483 // const int qmul= dequant_coeff[qp][0];
1485 int temp[16]; //FIXME check if this is a good idea
1486 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1487 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1490 const int offset= y_offset[i];
1491 const int z0= block[offset+stride*0] + block[offset+stride*4];
1492 const int z1= block[offset+stride*0] - block[offset+stride*4];
1493 const int z2= block[offset+stride*1] - block[offset+stride*5];
1494 const int z3= block[offset+stride*1] + block[offset+stride*5];
1503 const int offset= x_offset[i];
1504 const int z0= temp[4*0+i] + temp[4*2+i];
1505 const int z1= temp[4*0+i] - temp[4*2+i];
1506 const int z2= temp[4*1+i] - temp[4*3+i];
1507 const int z3= temp[4*1+i] + temp[4*3+i];
1509 block[stride*0 +offset]= (z0 + z3)>>1;
1510 block[stride*2 +offset]= (z1 + z2)>>1;
1511 block[stride*8 +offset]= (z1 - z2)>>1;
1512 block[stride*10+offset]= (z0 - z3)>>1;
1520 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1521 const int stride= 16*2;
1522 const int xStride= 16;
1525 a= block[stride*0 + xStride*0];
1526 b= block[stride*0 + xStride*1];
1527 c= block[stride*1 + xStride*0];
1528 d= block[stride*1 + xStride*1];
1535 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1536 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1537 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1538 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1542 static void chroma_dc_dct_c(DCTELEM *block){
1543 const int stride= 16*2;
1544 const int xStride= 16;
1547 a= block[stride*0 + xStride*0];
1548 b= block[stride*0 + xStride*1];
1549 c= block[stride*1 + xStride*0];
1550 d= block[stride*1 + xStride*1];
1557 block[stride*0 + xStride*0]= (a+c);
1558 block[stride*0 + xStride*1]= (e+b);
1559 block[stride*1 + xStride*0]= (a-c);
1560 block[stride*1 + xStride*1]= (e-b);
1565 * gets the chroma qp.
1567 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1568 return h->pps.chroma_qp_table[t][qscale];
1571 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1572 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1573 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1575 const int * const quant_table= quant_coeff[qscale];
1576 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1577 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1578 const unsigned int threshold2= (threshold1<<1);
1584 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1585 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1586 const unsigned int dc_threshold2= (dc_threshold1<<1);
1588 int level= block[0]*quant_coeff[qscale+18][0];
1589 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1591 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1594 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1597 // last_non_zero = i;
1602 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1603 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1604 const unsigned int dc_threshold2= (dc_threshold1<<1);
1606 int level= block[0]*quant_table[0];
1607 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1609 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1612 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1615 // last_non_zero = i;
1628 const int j= scantable[i];
1629 int level= block[j]*quant_table[j];
1631 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1632 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1633 if(((unsigned)(level+threshold1))>threshold2){
1635 level= (bias + level)>>QUANT_SHIFT;
1638 level= (bias - level)>>QUANT_SHIFT;
1647 return last_non_zero;
1650 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1651 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1652 int src_x_offset, int src_y_offset,
1653 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1654 MpegEncContext * const s = &h->s;
1655 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1656 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1657 const int luma_xy= (mx&3) + ((my&3)<<2);
1658 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1659 uint8_t * src_cb, * src_cr;
1660 int extra_width= h->emu_edge_width;
1661 int extra_height= h->emu_edge_height;
1663 const int full_mx= mx>>2;
1664 const int full_my= my>>2;
1665 const int pic_width = 16*s->mb_width;
1666 const int pic_height = 16*s->mb_height >> MB_FIELD;
1668 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1671 if(mx&7) extra_width -= 3;
1672 if(my&7) extra_height -= 3;
1674 if( full_mx < 0-extra_width
1675 || full_my < 0-extra_height
1676 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1677 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1678 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1679 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1683 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1685 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1688 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1691 // chroma offset when predicting from a field of opposite parity
1692 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1693 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1695 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1696 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1699 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1700 src_cb= s->edge_emu_buffer;
1702 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1705 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1706 src_cr= s->edge_emu_buffer;
1708 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1711 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1712 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1713 int x_offset, int y_offset,
1714 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1715 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1716 int list0, int list1){
1717 MpegEncContext * const s = &h->s;
1718 qpel_mc_func *qpix_op= qpix_put;
1719 h264_chroma_mc_func chroma_op= chroma_put;
1721 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1722 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1723 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1724 x_offset += 8*s->mb_x;
1725 y_offset += 8*(s->mb_y >> MB_FIELD);
1728 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1729 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1730 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1731 qpix_op, chroma_op);
1734 chroma_op= chroma_avg;
1738 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1739 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1740 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1741 qpix_op, chroma_op);
1745 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1746 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1747 int x_offset, int y_offset,
1748 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1749 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1750 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1751 int list0, int list1){
1752 MpegEncContext * const s = &h->s;
1754 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1755 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1756 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1757 x_offset += 8*s->mb_x;
1758 y_offset += 8*(s->mb_y >> MB_FIELD);
1761 /* don't optimize for luma-only case, since B-frames usually
1762 * use implicit weights => chroma too. */
1763 uint8_t *tmp_cb = s->obmc_scratchpad;
1764 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1765 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1766 int refn0 = h->ref_cache[0][ scan8[n] ];
1767 int refn1 = h->ref_cache[1][ scan8[n] ];
1769 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1770 dest_y, dest_cb, dest_cr,
1771 x_offset, y_offset, qpix_put, chroma_put);
1772 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1773 tmp_y, tmp_cb, tmp_cr,
1774 x_offset, y_offset, qpix_put, chroma_put);
1776 if(h->use_weight == 2){
1777 int weight0 = h->implicit_weight[refn0][refn1];
1778 int weight1 = 64 - weight0;
1779 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1780 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1781 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1783 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1784 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1785 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1786 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1787 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1788 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1789 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1790 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1791 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1794 int list = list1 ? 1 : 0;
1795 int refn = h->ref_cache[list][ scan8[n] ];
1796 Picture *ref= &h->ref_list[list][refn];
1797 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1798 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1799 qpix_put, chroma_put);
1801 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1802 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1803 if(h->use_weight_chroma){
1804 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1805 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1806 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1807 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1812 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1813 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1814 int x_offset, int y_offset,
1815 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1816 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1817 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1818 int list0, int list1){
1819 if((h->use_weight==2 && list0 && list1
1820 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1821 || h->use_weight==1)
1822 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1823 x_offset, y_offset, qpix_put, chroma_put,
1824 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1826 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1827 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1830 static inline void prefetch_motion(H264Context *h, int list){
1831 /* fetch pixels for estimated mv 4 macroblocks ahead
1832 * optimized for 64byte cache lines */
1833 MpegEncContext * const s = &h->s;
1834 const int refn = h->ref_cache[list][scan8[0]];
1836 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1837 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1838 uint8_t **src= h->ref_list[list][refn].data;
1839 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1840 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1841 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1842 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1846 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1847 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1848 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1849 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1850 MpegEncContext * const s = &h->s;
1851 const int mb_xy= h->mb_xy;
1852 const int mb_type= s->current_picture.mb_type[mb_xy];
1854 assert(IS_INTER(mb_type));
1856 prefetch_motion(h, 0);
1858 if(IS_16X16(mb_type)){
1859 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1860 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1861 &weight_op[0], &weight_avg[0],
1862 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1863 }else if(IS_16X8(mb_type)){
1864 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1865 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1866 &weight_op[1], &weight_avg[1],
1867 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1868 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1869 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1870 &weight_op[1], &weight_avg[1],
1871 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1872 }else if(IS_8X16(mb_type)){
1873 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1874 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1875 &weight_op[2], &weight_avg[2],
1876 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1877 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1878 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1879 &weight_op[2], &weight_avg[2],
1880 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1884 assert(IS_8X8(mb_type));
1887 const int sub_mb_type= h->sub_mb_type[i];
1889 int x_offset= (i&1)<<2;
1890 int y_offset= (i&2)<<1;
1892 if(IS_SUB_8X8(sub_mb_type)){
1893 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1894 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1895 &weight_op[3], &weight_avg[3],
1896 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1897 }else if(IS_SUB_8X4(sub_mb_type)){
1898 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1899 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1900 &weight_op[4], &weight_avg[4],
1901 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1902 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1903 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1904 &weight_op[4], &weight_avg[4],
1905 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1906 }else if(IS_SUB_4X8(sub_mb_type)){
1907 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1908 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1909 &weight_op[5], &weight_avg[5],
1910 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1911 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1912 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1913 &weight_op[5], &weight_avg[5],
1914 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1917 assert(IS_SUB_4X4(sub_mb_type));
1919 int sub_x_offset= x_offset + 2*(j&1);
1920 int sub_y_offset= y_offset + (j&2);
1921 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1922 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1923 &weight_op[6], &weight_avg[6],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1930 prefetch_motion(h, 1);
1933 static av_cold void decode_init_vlc(void){
1934 static int done = 0;
1940 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1941 &chroma_dc_coeff_token_len [0], 1, 1,
1942 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1945 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1946 &coeff_token_len [i][0], 1, 1,
1947 &coeff_token_bits[i][0], 1, 1, 1);
1951 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1952 &chroma_dc_total_zeros_len [i][0], 1, 1,
1953 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1955 for(i=0; i<15; i++){
1956 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1957 &total_zeros_len [i][0], 1, 1,
1958 &total_zeros_bits[i][0], 1, 1, 1);
1962 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1963 &run_len [i][0], 1, 1,
1964 &run_bits[i][0], 1, 1, 1);
1966 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1967 &run_len [6][0], 1, 1,
1968 &run_bits[6][0], 1, 1, 1);
1972 static void free_tables(H264Context *h){
1975 av_freep(&h->intra4x4_pred_mode);
1976 av_freep(&h->chroma_pred_mode_table);
1977 av_freep(&h->cbp_table);
1978 av_freep(&h->mvd_table[0]);
1979 av_freep(&h->mvd_table[1]);
1980 av_freep(&h->direct_table);
1981 av_freep(&h->non_zero_count);
1982 av_freep(&h->slice_table_base);
1983 h->slice_table= NULL;
1985 av_freep(&h->mb2b_xy);
1986 av_freep(&h->mb2b8_xy);
1988 for(i = 0; i < MAX_SPS_COUNT; i++)
1989 av_freep(h->sps_buffers + i);
1991 for(i = 0; i < MAX_PPS_COUNT; i++)
1992 av_freep(h->pps_buffers + i);
1994 for(i = 0; i < h->s.avctx->thread_count; i++) {
1995 hx = h->thread_context[i];
1997 av_freep(&hx->top_borders[1]);
1998 av_freep(&hx->top_borders[0]);
1999 av_freep(&hx->s.obmc_scratchpad);
2003 static void init_dequant8_coeff_table(H264Context *h){
2005 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2006 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2007 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2009 for(i=0; i<2; i++ ){
2010 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2011 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2015 for(q=0; q<52; q++){
2016 int shift = ff_div6[q];
2017 int idx = ff_rem6[q];
2019 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2020 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2021 h->pps.scaling_matrix8[i][x]) << shift;
2026 static void init_dequant4_coeff_table(H264Context *h){
2028 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2029 for(i=0; i<6; i++ ){
2030 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2032 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2033 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2040 for(q=0; q<52; q++){
2041 int shift = ff_div6[q] + 2;
2042 int idx = ff_rem6[q];
2044 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2045 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2046 h->pps.scaling_matrix4[i][x]) << shift;
2051 static void init_dequant_tables(H264Context *h){
2053 init_dequant4_coeff_table(h);
2054 if(h->pps.transform_8x8_mode)
2055 init_dequant8_coeff_table(h);
2056 if(h->sps.transform_bypass){
2059 h->dequant4_coeff[i][0][x] = 1<<6;
2060 if(h->pps.transform_8x8_mode)
2063 h->dequant8_coeff[i][0][x] = 1<<6;
2070 * needs width/height
2072 static int alloc_tables(H264Context *h){
2073 MpegEncContext * const s = &h->s;
2074 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2077 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2079 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2080 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2081 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2083 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2084 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2085 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2086 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2088 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2089 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2091 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2092 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2093 for(y=0; y<s->mb_height; y++){
2094 for(x=0; x<s->mb_width; x++){
2095 const int mb_xy= x + y*s->mb_stride;
2096 const int b_xy = 4*x + 4*y*h->b_stride;
2097 const int b8_xy= 2*x + 2*y*h->b8_stride;
2099 h->mb2b_xy [mb_xy]= b_xy;
2100 h->mb2b8_xy[mb_xy]= b8_xy;
2104 s->obmc_scratchpad = NULL;
2106 if(!h->dequant4_coeff[0])
2107 init_dequant_tables(h);
2116 * Mimic alloc_tables(), but for every context thread.
2118 static void clone_tables(H264Context *dst, H264Context *src){
2119 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2120 dst->non_zero_count = src->non_zero_count;
2121 dst->slice_table = src->slice_table;
2122 dst->cbp_table = src->cbp_table;
2123 dst->mb2b_xy = src->mb2b_xy;
2124 dst->mb2b8_xy = src->mb2b8_xy;
2125 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2126 dst->mvd_table[0] = src->mvd_table[0];
2127 dst->mvd_table[1] = src->mvd_table[1];
2128 dst->direct_table = src->direct_table;
2130 dst->s.obmc_scratchpad = NULL;
2131 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2136 * Allocate buffers which are not shared amongst multiple threads.
2138 static int context_init(H264Context *h){
2139 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2140 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2144 return -1; // free_tables will clean up for us
2147 static av_cold void common_init(H264Context *h){
2148 MpegEncContext * const s = &h->s;
2150 s->width = s->avctx->width;
2151 s->height = s->avctx->height;
2152 s->codec_id= s->avctx->codec->id;
2154 ff_h264_pred_init(&h->hpc, s->codec_id);
2156 h->dequant_coeff_pps= -1;
2157 s->unrestricted_mv=1;
2158 s->decode=1; //FIXME
2160 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2161 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2164 static av_cold int decode_init(AVCodecContext *avctx){
2165 H264Context *h= avctx->priv_data;
2166 MpegEncContext * const s = &h->s;
2168 MPV_decode_defaults(s);
2173 s->out_format = FMT_H264;
2174 s->workaround_bugs= avctx->workaround_bugs;
2177 // s->decode_mb= ff_h263_decode_mb;
2178 s->quarter_sample = 1;
2181 if(avctx->codec_id == CODEC_ID_SVQ3)
2182 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2184 avctx->pix_fmt= PIX_FMT_YUV420P;
2188 if(avctx->extradata_size > 0 && avctx->extradata &&
2189 *(char *)avctx->extradata == 1){
2196 h->thread_context[0] = h;
2197 h->outputed_poc = INT_MIN;
2201 static int frame_start(H264Context *h){
2202 MpegEncContext * const s = &h->s;
2205 if(MPV_frame_start(s, s->avctx) < 0)
2207 ff_er_frame_start(s);
2209 * MPV_frame_start uses pict_type to derive key_frame.
2210 * This is incorrect for H.264; IDR markings must be used.
2211 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2212 * See decode_nal_units().
2214 s->current_picture_ptr->key_frame= 0;
2216 assert(s->linesize && s->uvlinesize);
2218 for(i=0; i<16; i++){
2219 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2220 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2223 h->block_offset[16+i]=
2224 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2225 h->block_offset[24+16+i]=
2226 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2229 /* can't be in alloc_tables because linesize isn't known there.
2230 * FIXME: redo bipred weight to not require extra buffer? */
2231 for(i = 0; i < s->avctx->thread_count; i++)
2232 if(!h->thread_context[i]->s.obmc_scratchpad)
2233 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2235 /* some macroblocks will be accessed before they're available */
2236 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2237 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2239 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2241 // We mark the current picture as non-reference after allocating it, so
2242 // that if we break out due to an error it can be released automatically
2243 // in the next MPV_frame_start().
2244 // SVQ3 as well as most other codecs have only last/next/current and thus
2245 // get released even with set reference, besides SVQ3 and others do not
2246 // mark frames as reference later "naturally".
2247 if(s->codec_id != CODEC_ID_SVQ3)
2248 s->current_picture_ptr->reference= 0;
2250 s->current_picture_ptr->field_poc[0]=
2251 s->current_picture_ptr->field_poc[1]= INT_MAX;
2252 assert(s->current_picture_ptr->long_ref==0);
2257 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2258 MpegEncContext * const s = &h->s;
2262 src_cb -= uvlinesize;
2263 src_cr -= uvlinesize;
2265 // There are two lines saved, the line above the the top macroblock of a pair,
2266 // and the line above the bottom macroblock
2267 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2268 for(i=1; i<17; i++){
2269 h->left_border[i]= src_y[15+i* linesize];
2272 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2273 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2275 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2276 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2277 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2279 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2280 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2282 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2283 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2287 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2288 MpegEncContext * const s = &h->s;
2295 if(h->deblocking_filter == 2) {
2297 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2298 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2300 deblock_left = (s->mb_x > 0);
2301 deblock_top = (s->mb_y > 0);
2304 src_y -= linesize + 1;
2305 src_cb -= uvlinesize + 1;
2306 src_cr -= uvlinesize + 1;
2308 #define XCHG(a,b,t,xchg)\
2315 for(i = !deblock_top; i<17; i++){
2316 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2321 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2322 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2323 if(s->mb_x+1 < s->mb_width){
2324 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2328 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2330 for(i = !deblock_top; i<9; i++){
2331 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2332 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2336 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2337 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2342 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2343 MpegEncContext * const s = &h->s;
2346 src_y -= 2 * linesize;
2347 src_cb -= 2 * uvlinesize;
2348 src_cr -= 2 * uvlinesize;
2350 // There are two lines saved, the line above the the top macroblock of a pair,
2351 // and the line above the bottom macroblock
2352 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2353 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2354 for(i=2; i<34; i++){
2355 h->left_border[i]= src_y[15+i* linesize];
2358 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2359 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2360 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2361 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2363 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2364 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2365 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2366 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2367 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2368 for(i=2; i<18; i++){
2369 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2370 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2372 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2373 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2374 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2375 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2379 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2380 MpegEncContext * const s = &h->s;
2383 int deblock_left = (s->mb_x > 0);
2384 int deblock_top = (s->mb_y > 1);
2386 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2388 src_y -= 2 * linesize + 1;
2389 src_cb -= 2 * uvlinesize + 1;
2390 src_cr -= 2 * uvlinesize + 1;
2392 #define XCHG(a,b,t,xchg)\
2399 for(i = (!deblock_top)<<1; i<34; i++){
2400 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2405 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2406 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2407 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2408 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2409 if(s->mb_x+1 < s->mb_width){
2410 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2411 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2415 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2417 for(i = (!deblock_top) << 1; i<18; i++){
2418 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2419 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2423 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2424 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2425 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2426 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2431 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2432 MpegEncContext * const s = &h->s;
2433 const int mb_x= s->mb_x;
2434 const int mb_y= s->mb_y;
2435 const int mb_xy= h->mb_xy;
2436 const int mb_type= s->current_picture.mb_type[mb_xy];
2437 uint8_t *dest_y, *dest_cb, *dest_cr;
2438 int linesize, uvlinesize /*dct_offset*/;
2440 int *block_offset = &h->block_offset[0];
2441 const unsigned int bottom = mb_y & 1;
2442 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2443 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2444 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2446 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2447 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2448 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2450 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2451 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2453 if (!simple && MB_FIELD) {
2454 linesize = h->mb_linesize = s->linesize * 2;
2455 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2456 block_offset = &h->block_offset[24];
2457 if(mb_y&1){ //FIXME move out of this function?
2458 dest_y -= s->linesize*15;
2459 dest_cb-= s->uvlinesize*7;
2460 dest_cr-= s->uvlinesize*7;
2464 for(list=0; list<h->list_count; list++){
2465 if(!USES_LIST(mb_type, list))
2467 if(IS_16X16(mb_type)){
2468 int8_t *ref = &h->ref_cache[list][scan8[0]];
2469 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2471 for(i=0; i<16; i+=4){
2472 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2473 int ref = h->ref_cache[list][scan8[i]];
2475 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2481 linesize = h->mb_linesize = s->linesize;
2482 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2483 // dct_offset = s->linesize * 16;
2486 if(transform_bypass){
2488 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2489 }else if(IS_8x8DCT(mb_type)){
2490 idct_dc_add = s->dsp.h264_idct8_dc_add;
2491 idct_add = s->dsp.h264_idct8_add;
2493 idct_dc_add = s->dsp.h264_idct_dc_add;
2494 idct_add = s->dsp.h264_idct_add;
2497 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2498 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2499 int mbt_y = mb_y&~1;
2500 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2501 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2502 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2503 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2506 if (!simple && IS_INTRA_PCM(mb_type)) {
2507 for (i=0; i<16; i++) {
2508 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2510 for (i=0; i<8; i++) {
2511 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2512 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2515 if(IS_INTRA(mb_type)){
2516 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2517 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2519 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2520 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2521 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2524 if(IS_INTRA4x4(mb_type)){
2525 if(simple || !s->encoding){
2526 if(IS_8x8DCT(mb_type)){
2527 for(i=0; i<16; i+=4){
2528 uint8_t * const ptr= dest_y + block_offset[i];
2529 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2530 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2531 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2532 (h->topright_samples_available<<i)&0x4000, linesize);
2534 if(nnz == 1 && h->mb[i*16])
2535 idct_dc_add(ptr, h->mb + i*16, linesize);
2537 idct_add(ptr, h->mb + i*16, linesize);
2541 for(i=0; i<16; i++){
2542 uint8_t * const ptr= dest_y + block_offset[i];
2544 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2547 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2548 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2549 assert(mb_y || linesize <= block_offset[i]);
2550 if(!topright_avail){
2551 tr= ptr[3 - linesize]*0x01010101;
2552 topright= (uint8_t*) &tr;
2554 topright= ptr + 4 - linesize;
2558 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2559 nnz = h->non_zero_count_cache[ scan8[i] ];
2562 if(nnz == 1 && h->mb[i*16])
2563 idct_dc_add(ptr, h->mb + i*16, linesize);
2565 idct_add(ptr, h->mb + i*16, linesize);
2567 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2572 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2574 if(!transform_bypass)
2575 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2577 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2579 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2580 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2582 hl_motion(h, dest_y, dest_cb, dest_cr,
2583 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2584 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2585 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2589 if(!IS_INTRA4x4(mb_type)){
2591 if(IS_INTRA16x16(mb_type)){
2592 for(i=0; i<16; i++){
2593 if(h->non_zero_count_cache[ scan8[i] ])
2594 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2595 else if(h->mb[i*16])
2596 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2599 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2600 for(i=0; i<16; i+=di){
2601 int nnz = h->non_zero_count_cache[ scan8[i] ];
2603 if(nnz==1 && h->mb[i*16])
2604 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2606 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2611 for(i=0; i<16; i++){
2612 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2613 uint8_t * const ptr= dest_y + block_offset[i];
2614 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2620 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2621 uint8_t *dest[2] = {dest_cb, dest_cr};
2622 if(transform_bypass){
2623 idct_add = idct_dc_add = s->dsp.add_pixels4;
2625 idct_add = s->dsp.h264_idct_add;
2626 idct_dc_add = s->dsp.h264_idct_dc_add;
2627 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2628 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2631 for(i=16; i<16+8; i++){
2632 if(h->non_zero_count_cache[ scan8[i] ])
2633 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2634 else if(h->mb[i*16])
2635 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2638 for(i=16; i<16+8; i++){
2639 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2640 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2641 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2647 if(h->deblocking_filter) {
2648 if (!simple && FRAME_MBAFF) {
2649 //FIXME try deblocking one mb at a time?
2650 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2651 const int mb_y = s->mb_y - 1;
2652 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2653 const int mb_xy= mb_x + mb_y*s->mb_stride;
2654 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2655 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2656 if (!bottom) return;
2657 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2658 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2659 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2661 if(IS_INTRA(mb_type_top | mb_type_bottom))
2662 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2664 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2667 s->mb_y--; h->mb_xy -= s->mb_stride;
2668 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2669 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2670 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2671 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2672 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2674 s->mb_y++; h->mb_xy += s->mb_stride;
2675 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2676 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2677 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2678 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2679 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2681 tprintf(h->s.avctx, "call filter_mb\n");
2682 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2683 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2684 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2685 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2686 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2692 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2694 static void hl_decode_mb_simple(H264Context *h){
2695 hl_decode_mb_internal(h, 1);
2699 * Process a macroblock; this handles edge cases, such as interlacing.
2701 static void av_noinline hl_decode_mb_complex(H264Context *h){
2702 hl_decode_mb_internal(h, 0);
2705 static void hl_decode_mb(H264Context *h){
2706 MpegEncContext * const s = &h->s;
2707 const int mb_xy= h->mb_xy;
2708 const int mb_type= s->current_picture.mb_type[mb_xy];
2709 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2710 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2712 if(ENABLE_H264_ENCODER && !s->decode)
2716 hl_decode_mb_complex(h);
2717 else hl_decode_mb_simple(h);
2720 static void pic_as_field(Picture *pic, const int parity){
2722 for (i = 0; i < 4; ++i) {
2723 if (parity == PICT_BOTTOM_FIELD)
2724 pic->data[i] += pic->linesize[i];
2725 pic->reference = parity;
2726 pic->linesize[i] *= 2;
2728 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2731 static int split_field_copy(Picture *dest, Picture *src,
2732 int parity, int id_add){
2733 int match = !!(src->reference & parity);
2737 if(parity != PICT_FRAME){
2738 pic_as_field(dest, parity);
2740 dest->pic_id += id_add;
2747 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2751 while(i[0]<len || i[1]<len){
2752 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2754 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2757 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2758 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2761 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2762 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2769 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2774 best_poc= dir ? INT_MIN : INT_MAX;
2776 for(i=0; i<len; i++){
2777 const int poc= src[i]->poc;
2778 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2780 sorted[out_i]= src[i];
2783 if(best_poc == (dir ? INT_MIN : INT_MAX))
2785 limit= sorted[out_i++]->poc - dir;
2791 * fills the default_ref_list.
2793 static int fill_default_ref_list(H264Context *h){
2794 MpegEncContext * const s = &h->s;
2797 if(h->slice_type_nos==FF_B_TYPE){
2798 Picture *sorted[32];
2803 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2805 cur_poc= s->current_picture_ptr->poc;
2807 for(list= 0; list<2; list++){
2808 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2809 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2811 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2812 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2815 if(len < h->ref_count[list])
2816 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2820 if(lens[0] == lens[1] && lens[1] > 1){
2821 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2823 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2826 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2827 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2829 if(len < h->ref_count[0])
2830 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2833 for (i=0; i<h->ref_count[0]; i++) {
2834 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2836 if(h->slice_type_nos==FF_B_TYPE){
2837 for (i=0; i<h->ref_count[1]; i++) {
2838 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2845 static void print_short_term(H264Context *h);
2846 static void print_long_term(H264Context *h);
2849 * Extract structure information about the picture described by pic_num in
2850 * the current decoding context (frame or field). Note that pic_num is
2851 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2852 * @param pic_num picture number for which to extract structure information
2853 * @param structure one of PICT_XXX describing structure of picture
2855 * @return frame number (short term) or long term index of picture
2856 * described by pic_num
2858 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2859 MpegEncContext * const s = &h->s;
2861 *structure = s->picture_structure;
2864 /* opposite field */
2865 *structure ^= PICT_FRAME;
2872 static int decode_ref_pic_list_reordering(H264Context *h){
2873 MpegEncContext * const s = &h->s;
2874 int list, index, pic_structure;
2876 print_short_term(h);
2879 for(list=0; list<h->list_count; list++){
2880 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2882 if(get_bits1(&s->gb)){
2883 int pred= h->curr_pic_num;
2885 for(index=0; ; index++){
2886 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2887 unsigned int pic_id;
2889 Picture *ref = NULL;
2891 if(reordering_of_pic_nums_idc==3)
2894 if(index >= h->ref_count[list]){
2895 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2899 if(reordering_of_pic_nums_idc<3){
2900 if(reordering_of_pic_nums_idc<2){
2901 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2904 if(abs_diff_pic_num > h->max_pic_num){
2905 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2909 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2910 else pred+= abs_diff_pic_num;
2911 pred &= h->max_pic_num - 1;
2913 frame_num = pic_num_extract(h, pred, &pic_structure);
2915 for(i= h->short_ref_count-1; i>=0; i--){
2916 ref = h->short_ref[i];
2917 assert(ref->reference);
2918 assert(!ref->long_ref);
2920 ref->frame_num == frame_num &&
2921 (ref->reference & pic_structure)
2929 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2931 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2934 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2937 ref = h->long_ref[long_idx];
2938 assert(!(ref && !ref->reference));
2939 if(ref && (ref->reference & pic_structure)){
2940 ref->pic_id= pic_id;
2941 assert(ref->long_ref);
2949 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2950 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2952 for(i=index; i+1<h->ref_count[list]; i++){
2953 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2956 for(; i > index; i--){
2957 h->ref_list[list][i]= h->ref_list[list][i-1];
2959 h->ref_list[list][index]= *ref;
2961 pic_as_field(&h->ref_list[list][index], pic_structure);
2965 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2971 for(list=0; list<h->list_count; list++){
2972 for(index= 0; index < h->ref_count[list]; index++){
2973 if(!h->ref_list[list][index].data[0]){
2974 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2975 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2980 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
2981 direct_dist_scale_factor(h);
2982 direct_ref_list_init(h);
2986 static void fill_mbaff_ref_list(H264Context *h){
2988 for(list=0; list<2; list++){ //FIXME try list_count
2989 for(i=0; i<h->ref_count[list]; i++){
2990 Picture *frame = &h->ref_list[list][i];
2991 Picture *field = &h->ref_list[list][16+2*i];
2994 field[0].linesize[j] <<= 1;
2995 field[0].reference = PICT_TOP_FIELD;
2996 field[1] = field[0];
2998 field[1].data[j] += frame->linesize[j];
2999 field[1].reference = PICT_BOTTOM_FIELD;
3001 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3002 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3004 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3005 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3009 for(j=0; j<h->ref_count[1]; j++){
3010 for(i=0; i<h->ref_count[0]; i++)
3011 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3012 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3013 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3017 static int pred_weight_table(H264Context *h){
3018 MpegEncContext * const s = &h->s;
3020 int luma_def, chroma_def;
3023 h->use_weight_chroma= 0;
3024 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3025 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3026 luma_def = 1<<h->luma_log2_weight_denom;
3027 chroma_def = 1<<h->chroma_log2_weight_denom;
3029 for(list=0; list<2; list++){
3030 for(i=0; i<h->ref_count[list]; i++){
3031 int luma_weight_flag, chroma_weight_flag;
3033 luma_weight_flag= get_bits1(&s->gb);
3034 if(luma_weight_flag){
3035 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3036 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3037 if( h->luma_weight[list][i] != luma_def
3038 || h->luma_offset[list][i] != 0)
3041 h->luma_weight[list][i]= luma_def;
3042 h->luma_offset[list][i]= 0;
3046 chroma_weight_flag= get_bits1(&s->gb);
3047 if(chroma_weight_flag){
3050 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3051 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3052 if( h->chroma_weight[list][i][j] != chroma_def
3053 || h->chroma_offset[list][i][j] != 0)
3054 h->use_weight_chroma= 1;
3059 h->chroma_weight[list][i][j]= chroma_def;
3060 h->chroma_offset[list][i][j]= 0;
3065 if(h->slice_type_nos != FF_B_TYPE) break;
3067 h->use_weight= h->use_weight || h->use_weight_chroma;
3071 static void implicit_weight_table(H264Context *h){
3072 MpegEncContext * const s = &h->s;
3074 int cur_poc = s->current_picture_ptr->poc;
3076 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3077 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3079 h->use_weight_chroma= 0;
3084 h->use_weight_chroma= 2;
3085 h->luma_log2_weight_denom= 5;
3086 h->chroma_log2_weight_denom= 5;
3088 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3089 int poc0 = h->ref_list[0][ref0].poc;
3090 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3091 int poc1 = h->ref_list[1][ref1].poc;
3092 int td = av_clip(poc1 - poc0, -128, 127);
3094 int tb = av_clip(cur_poc - poc0, -128, 127);
3095 int tx = (16384 + (FFABS(td) >> 1)) / td;
3096 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3097 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3098 h->implicit_weight[ref0][ref1] = 32;
3100 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3102 h->implicit_weight[ref0][ref1] = 32;
3108 * Mark a picture as no longer needed for reference. The refmask
3109 * argument allows unreferencing of individual fields or the whole frame.
3110 * If the picture becomes entirely unreferenced, but is being held for
3111 * display purposes, it is marked as such.
3112 * @param refmask mask of fields to unreference; the mask is bitwise
3113 * anded with the reference marking of pic
3114 * @return non-zero if pic becomes entirely unreferenced (except possibly
3115 * for display purposes) zero if one of the fields remains in
3118 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3120 if (pic->reference &= refmask) {
3123 for(i = 0; h->delayed_pic[i]; i++)
3124 if(pic == h->delayed_pic[i]){
3125 pic->reference=DELAYED_PIC_REF;
3133 * instantaneous decoder refresh.
3135 static void idr(H264Context *h){
3138 for(i=0; i<16; i++){
3139 remove_long(h, i, 0);
3141 assert(h->long_ref_count==0);
3143 for(i=0; i<h->short_ref_count; i++){
3144 unreference_pic(h, h->short_ref[i], 0);
3145 h->short_ref[i]= NULL;
3147 h->short_ref_count=0;
3148 h->prev_frame_num= 0;
3149 h->prev_frame_num_offset= 0;
3154 /* forget old pics after a seek */
3155 static void flush_dpb(AVCodecContext *avctx){
3156 H264Context *h= avctx->priv_data;
3158 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3159 if(h->delayed_pic[i])
3160 h->delayed_pic[i]->reference= 0;
3161 h->delayed_pic[i]= NULL;
3163 h->outputed_poc= INT_MIN;
3165 if(h->s.current_picture_ptr)
3166 h->s.current_picture_ptr->reference= 0;
3167 h->s.first_field= 0;
3168 ff_mpeg_flush(avctx);
3172 * Find a Picture in the short term reference list by frame number.
3173 * @param frame_num frame number to search for
3174 * @param idx the index into h->short_ref where returned picture is found
3175 * undefined if no picture found.
3176 * @return pointer to the found picture, or NULL if no pic with the provided
3177 * frame number is found
3179 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3180 MpegEncContext * const s = &h->s;
3183 for(i=0; i<h->short_ref_count; i++){
3184 Picture *pic= h->short_ref[i];
3185 if(s->avctx->debug&FF_DEBUG_MMCO)
3186 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3187 if(pic->frame_num == frame_num) {
3196 * Remove a picture from the short term reference list by its index in
3197 * that list. This does no checking on the provided index; it is assumed
3198 * to be valid. Other list entries are shifted down.
3199 * @param i index into h->short_ref of picture to remove.
3201 static void remove_short_at_index(H264Context *h, int i){
3202 assert(i >= 0 && i < h->short_ref_count);
3203 h->short_ref[i]= NULL;
3204 if (--h->short_ref_count)
3205 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3210 * @return the removed picture or NULL if an error occurs
3212 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3213 MpegEncContext * const s = &h->s;
3217 if(s->avctx->debug&FF_DEBUG_MMCO)
3218 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3220 pic = find_short(h, frame_num, &i);
3222 if(unreference_pic(h, pic, ref_mask))
3223 remove_short_at_index(h, i);
3230 * Remove a picture from the long term reference list by its index in
3232 * @return the removed picture or NULL if an error occurs
3234 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3237 pic= h->long_ref[i];
3239 if(unreference_pic(h, pic, ref_mask)){
3240 assert(h->long_ref[i]->long_ref == 1);
3241 h->long_ref[i]->long_ref= 0;
3242 h->long_ref[i]= NULL;
3243 h->long_ref_count--;
3251 * print short term list
3253 static void print_short_term(H264Context *h) {
3255 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3256 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3257 for(i=0; i<h->short_ref_count; i++){
3258 Picture *pic= h->short_ref[i];
3259 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3265 * print long term list
3267 static void print_long_term(H264Context *h) {
3269 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3270 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3271 for(i = 0; i < 16; i++){
3272 Picture *pic= h->long_ref[i];
3274 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3281 * Executes the reference picture marking (memory management control operations).
3283 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3284 MpegEncContext * const s = &h->s;
3286 int current_ref_assigned=0;
3289 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3290 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3292 for(i=0; i<mmco_count; i++){
3293 int structure, frame_num;
3294 if(s->avctx->debug&FF_DEBUG_MMCO)
3295 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3297 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3298 || mmco[i].opcode == MMCO_SHORT2LONG){
3299 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3300 pic = find_short(h, frame_num, &j);
3302 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3303 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3304 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3309 switch(mmco[i].opcode){
3310 case MMCO_SHORT2UNUSED:
3311 if(s->avctx->debug&FF_DEBUG_MMCO)
3312 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3313 remove_short(h, frame_num, structure ^ PICT_FRAME);
3315 case MMCO_SHORT2LONG:
3316 if (h->long_ref[mmco[i].long_arg] != pic)
3317 remove_long(h, mmco[i].long_arg, 0);
3319 remove_short_at_index(h, j);
3320 h->long_ref[ mmco[i].long_arg ]= pic;
3321 if (h->long_ref[ mmco[i].long_arg ]){
3322 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3323 h->long_ref_count++;
3326 case MMCO_LONG2UNUSED:
3327 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3328 pic = h->long_ref[j];
3330 remove_long(h, j, structure ^ PICT_FRAME);
3331 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3332 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3335 // Comment below left from previous code as it is an interresting note.
3336 /* First field in pair is in short term list or
3337 * at a different long term index.
3338 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3339 * Report the problem and keep the pair where it is,
3340 * and mark this field valid.
3343 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3344 remove_long(h, mmco[i].long_arg, 0);
3346 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3347 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3348 h->long_ref_count++;
3351 s->current_picture_ptr->reference |= s->picture_structure;
3352 current_ref_assigned=1;
3354 case MMCO_SET_MAX_LONG:
3355 assert(mmco[i].long_arg <= 16);
3356 // just remove the long term which index is greater than new max
3357 for(j = mmco[i].long_arg; j<16; j++){
3358 remove_long(h, j, 0);
3362 while(h->short_ref_count){
3363 remove_short(h, h->short_ref[0]->frame_num, 0);
3365 for(j = 0; j < 16; j++) {
3366 remove_long(h, j, 0);
3368 s->current_picture_ptr->poc=
3369 s->current_picture_ptr->field_poc[0]=
3370 s->current_picture_ptr->field_poc[1]=
3374 s->current_picture_ptr->frame_num= 0;
3380 if (!current_ref_assigned) {
3381 /* Second field of complementary field pair; the first field of
3382 * which is already referenced. If short referenced, it
3383 * should be first entry in short_ref. If not, it must exist
3384 * in long_ref; trying to put it on the short list here is an
3385 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3387 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3388 /* Just mark the second field valid */
3389 s->current_picture_ptr->reference = PICT_FRAME;
3390 } else if (s->current_picture_ptr->long_ref) {
3391 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3392 "assignment for second field "
3393 "in complementary field pair "
3394 "(first field is long term)\n");
3396 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3398 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3401 if(h->short_ref_count)
3402 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3404 h->short_ref[0]= s->current_picture_ptr;
3405 h->short_ref_count++;
3406 s->current_picture_ptr->reference |= s->picture_structure;
3410 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3412 /* We have too many reference frames, probably due to corrupted
3413 * stream. Need to discard one frame. Prevents overrun of the
3414 * short_ref and long_ref buffers.
3416 av_log(h->s.avctx, AV_LOG_ERROR,
3417 "number of reference frames exceeds max (probably "
3418 "corrupt input), discarding one\n");
3420 if (h->long_ref_count && !h->short_ref_count) {
3421 for (i = 0; i < 16; ++i)
3426 remove_long(h, i, 0);
3428 pic = h->short_ref[h->short_ref_count - 1];
3429 remove_short(h, pic->frame_num, 0);
3433 print_short_term(h);
3438 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3439 MpegEncContext * const s = &h->s;
3443 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3444 s->broken_link= get_bits1(gb) -1;
3446 h->mmco[0].opcode= MMCO_LONG;
3447 h->mmco[0].long_arg= 0;
3451 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3452 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3453 MMCOOpcode opcode= get_ue_golomb(gb);
3455 h->mmco[i].opcode= opcode;
3456 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3457 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3458 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3459 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3463 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3464 unsigned int long_arg= get_ue_golomb(gb);
3465 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3466 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3469 h->mmco[i].long_arg= long_arg;
3472 if(opcode > (unsigned)MMCO_LONG){
3473 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3476 if(opcode == MMCO_END)
3481 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3483 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3484 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3485 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3486 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3488 if (FIELD_PICTURE) {
3489 h->mmco[0].short_pic_num *= 2;
3490 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3491 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3501 static int init_poc(H264Context *h){
3502 MpegEncContext * const s = &h->s;
3503 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3505 Picture *cur = s->current_picture_ptr;
3507 h->frame_num_offset= h->prev_frame_num_offset;
3508 if(h->frame_num < h->prev_frame_num)
3509 h->frame_num_offset += max_frame_num;
3511 if(h->sps.poc_type==0){
3512 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3514 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3515 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3516 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3517 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3519 h->poc_msb = h->prev_poc_msb;
3520 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3522 field_poc[1] = h->poc_msb + h->poc_lsb;
3523 if(s->picture_structure == PICT_FRAME)
3524 field_poc[1] += h->delta_poc_bottom;
3525 }else if(h->sps.poc_type==1){
3526 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3529 if(h->sps.poc_cycle_length != 0)
3530 abs_frame_num = h->frame_num_offset + h->frame_num;
3534 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3537 expected_delta_per_poc_cycle = 0;
3538 for(i=0; i < h->sps.poc_cycle_length; i++)
3539 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3541 if(abs_frame_num > 0){
3542 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3543 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3545 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3546 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3547 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3551 if(h->nal_ref_idc == 0)
3552 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3554 field_poc[0] = expectedpoc + h->delta_poc[0];
3555 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3557 if(s->picture_structure == PICT_FRAME)
3558 field_poc[1] += h->delta_poc[1];
3560 int poc= 2*(h->frame_num_offset + h->frame_num);
3569 if(s->picture_structure != PICT_BOTTOM_FIELD)
3570 s->current_picture_ptr->field_poc[0]= field_poc[0];
3571 if(s->picture_structure != PICT_TOP_FIELD)
3572 s->current_picture_ptr->field_poc[1]= field_poc[1];
3573 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3580 * initialize scan tables
3582 static void init_scan_tables(H264Context *h){
3583 MpegEncContext * const s = &h->s;
3585 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3586 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3587 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3589 for(i=0; i<16; i++){
3590 #define T(x) (x>>2) | ((x<<2) & 0xF)
3591 h->zigzag_scan[i] = T(zigzag_scan[i]);
3592 h-> field_scan[i] = T( field_scan[i]);
3596 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3597 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3598 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3599 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3600 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3602 for(i=0; i<64; i++){
3603 #define T(x) (x>>3) | ((x&7)<<3)
3604 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3605 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3606 h->field_scan8x8[i] = T(field_scan8x8[i]);
3607 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3611 if(h->sps.transform_bypass){ //FIXME same ugly
3612 h->zigzag_scan_q0 = zigzag_scan;
3613 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3614 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3615 h->field_scan_q0 = field_scan;
3616 h->field_scan8x8_q0 = field_scan8x8;
3617 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3619 h->zigzag_scan_q0 = h->zigzag_scan;
3620 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3621 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3622 h->field_scan_q0 = h->field_scan;
3623 h->field_scan8x8_q0 = h->field_scan8x8;
3624 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3629 * Replicates H264 "master" context to thread contexts.
3631 static void clone_slice(H264Context *dst, H264Context *src)
3633 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3634 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3635 dst->s.current_picture = src->s.current_picture;
3636 dst->s.linesize = src->s.linesize;
3637 dst->s.uvlinesize = src->s.uvlinesize;
3638 dst->s.first_field = src->s.first_field;
3640 dst->prev_poc_msb = src->prev_poc_msb;
3641 dst->prev_poc_lsb = src->prev_poc_lsb;
3642 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3643 dst->prev_frame_num = src->prev_frame_num;
3644 dst->short_ref_count = src->short_ref_count;
3646 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3647 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3648 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3649 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3651 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3652 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3656 * decodes a slice header.
3657 * This will also call MPV_common_init() and frame_start() as needed.
3659 * @param h h264context
3660 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3662 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3664 static int decode_slice_header(H264Context *h, H264Context *h0){
3665 MpegEncContext * const s = &h->s;
3666 MpegEncContext * const s0 = &h0->s;
3667 unsigned int first_mb_in_slice;
3668 unsigned int pps_id;
3669 int num_ref_idx_active_override_flag;
3670 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3671 unsigned int slice_type, tmp, i, j;
3672 int default_ref_list_done = 0;
3673 int last_pic_structure;
3675 s->dropable= h->nal_ref_idc == 0;
3677 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3678 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3679 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3681 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3682 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3685 first_mb_in_slice= get_ue_golomb(&s->gb);
3687 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3688 h0->current_slice = 0;
3689 if (!s0->first_field)
3690 s->current_picture_ptr= NULL;
3693 slice_type= get_ue_golomb(&s->gb);
3695 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3700 h->slice_type_fixed=1;
3702 h->slice_type_fixed=0;
3704 slice_type= slice_type_map[ slice_type ];
3705 if (slice_type == FF_I_TYPE
3706 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3707 default_ref_list_done = 1;
3709 h->slice_type= slice_type;
3710 h->slice_type_nos= slice_type & 3;
3712 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3713 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3714 av_log(h->s.avctx, AV_LOG_ERROR,
3715 "B picture before any references, skipping\n");
3719 pps_id= get_ue_golomb(&s->gb);
3720 if(pps_id>=MAX_PPS_COUNT){
3721 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3724 if(!h0->pps_buffers[pps_id]) {
3725 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3728 h->pps= *h0->pps_buffers[pps_id];
3730 if(!h0->sps_buffers[h->pps.sps_id]) {
3731 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3734 h->sps = *h0->sps_buffers[h->pps.sps_id];
3736 if(h == h0 && h->dequant_coeff_pps != pps_id){
3737 h->dequant_coeff_pps = pps_id;
3738 init_dequant_tables(h);
3741 s->mb_width= h->sps.mb_width;
3742 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3744 h->b_stride= s->mb_width*4;
3745 h->b8_stride= s->mb_width*2;
3747 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3748 if(h->sps.frame_mbs_only_flag)
3749 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3751 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3753 if (s->context_initialized
3754 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3756 return -1; // width / height changed during parallelized decoding
3760 if (!s->context_initialized) {
3762 return -1; // we cant (re-)initialize context during parallel decoding
3763 if (MPV_common_init(s) < 0)
3767 init_scan_tables(h);
3770 for(i = 1; i < s->avctx->thread_count; i++) {
3772 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3773 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3774 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3777 init_scan_tables(c);
3781 for(i = 0; i < s->avctx->thread_count; i++)
3782 if(context_init(h->thread_context[i]) < 0)
3785 s->avctx->width = s->width;
3786 s->avctx->height = s->height;
3787 s->avctx->sample_aspect_ratio= h->sps.sar;
3788 if(!s->avctx->sample_aspect_ratio.den)
3789 s->avctx->sample_aspect_ratio.den = 1;
3791 if(h->sps.timing_info_present_flag){
3792 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3793 if(h->x264_build > 0 && h->x264_build < 44)
3794 s->avctx->time_base.den *= 2;
3795 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3796 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3800 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3803 h->mb_aff_frame = 0;
3804 last_pic_structure = s0->picture_structure;
3805 if(h->sps.frame_mbs_only_flag){
3806 s->picture_structure= PICT_FRAME;
3808 if(get_bits1(&s->gb)) { //field_pic_flag
3809 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3811 s->picture_structure= PICT_FRAME;
3812 h->mb_aff_frame = h->sps.mb_aff;
3815 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3817 if(h0->current_slice == 0){
3818 while(h->frame_num != h->prev_frame_num &&
3819 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3820 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3822 h->prev_frame_num++;
3823 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3824 s->current_picture_ptr->frame_num= h->prev_frame_num;
3825 execute_ref_pic_marking(h, NULL, 0);
3828 /* See if we have a decoded first field looking for a pair... */
3829 if (s0->first_field) {
3830 assert(s0->current_picture_ptr);
3831 assert(s0->current_picture_ptr->data[0]);
3832 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3834 /* figure out if we have a complementary field pair */
3835 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3837 * Previous field is unmatched. Don't display it, but let it
3838 * remain for reference if marked as such.
3840 s0->current_picture_ptr = NULL;
3841 s0->first_field = FIELD_PICTURE;
3844 if (h->nal_ref_idc &&
3845 s0->current_picture_ptr->reference &&
3846 s0->current_picture_ptr->frame_num != h->frame_num) {
3848 * This and previous field were reference, but had
3849 * different frame_nums. Consider this field first in
3850 * pair. Throw away previous field except for reference
3853 s0->first_field = 1;
3854 s0->current_picture_ptr = NULL;
3857 /* Second field in complementary pair */
3858 s0->first_field = 0;
3863 /* Frame or first field in a potentially complementary pair */
3864 assert(!s0->current_picture_ptr);
3865 s0->first_field = FIELD_PICTURE;
3868 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3869 s0->first_field = 0;
3876 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3878 assert(s->mb_num == s->mb_width * s->mb_height);
3879 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3880 first_mb_in_slice >= s->mb_num){
3881 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3884 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3885 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3886 if (s->picture_structure == PICT_BOTTOM_FIELD)
3887 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3888 assert(s->mb_y < s->mb_height);
3890 if(s->picture_structure==PICT_FRAME){
3891 h->curr_pic_num= h->frame_num;
3892 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3894 h->curr_pic_num= 2*h->frame_num + 1;
3895 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3898 if(h->nal_unit_type == NAL_IDR_SLICE){
3899 get_ue_golomb(&s->gb); /* idr_pic_id */
3902 if(h->sps.poc_type==0){
3903 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3905 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3906 h->delta_poc_bottom= get_se_golomb(&s->gb);
3910 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3911 h->delta_poc[0]= get_se_golomb(&s->gb);
3913 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3914 h->delta_poc[1]= get_se_golomb(&s->gb);
3919 if(h->pps.redundant_pic_cnt_present){
3920 h->redundant_pic_count= get_ue_golomb(&s->gb);
3923 //set defaults, might be overridden a few lines later
3924 h->ref_count[0]= h->pps.ref_count[0];
3925 h->ref_count[1]= h->pps.ref_count[1];
3927 if(h->slice_type_nos != FF_I_TYPE){
3928 if(h->slice_type_nos == FF_B_TYPE){
3929 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3931 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3933 if(num_ref_idx_active_override_flag){
3934 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3935 if(h->slice_type_nos==FF_B_TYPE)
3936 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3938 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3939 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3940 h->ref_count[0]= h->ref_count[1]= 1;
3944 if(h->slice_type_nos == FF_B_TYPE)
3951 if(!default_ref_list_done){
3952 fill_default_ref_list(h);
3955 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3958 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3959 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3960 pred_weight_table(h);
3961 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3962 implicit_weight_table(h);
3967 decode_ref_pic_marking(h0, &s->gb);
3970 fill_mbaff_ref_list(h);
3972 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3973 tmp = get_ue_golomb(&s->gb);
3975 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3978 h->cabac_init_idc= tmp;
3981 h->last_qscale_diff = 0;
3982 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3984 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3988 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3989 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3990 //FIXME qscale / qp ... stuff
3991 if(h->slice_type == FF_SP_TYPE){
3992 get_bits1(&s->gb); /* sp_for_switch_flag */
3994 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3995 get_se_golomb(&s->gb); /* slice_qs_delta */
3998 h->deblocking_filter = 1;
3999 h->slice_alpha_c0_offset = 0;
4000 h->slice_beta_offset = 0;
4001 if( h->pps.deblocking_filter_parameters_present ) {
4002 tmp= get_ue_golomb(&s->gb);
4004 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4007 h->deblocking_filter= tmp;
4008 if(h->deblocking_filter < 2)
4009 h->deblocking_filter^= 1; // 1<->0
4011 if( h->deblocking_filter ) {
4012 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4013 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4017 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4018 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4019 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4020 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4021 h->deblocking_filter= 0;
4023 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4024 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4025 /* Cheat slightly for speed:
4026 Do not bother to deblock across slices. */
4027 h->deblocking_filter = 2;
4029 h0->max_contexts = 1;
4030 if(!h0->single_decode_warning) {
4031 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4032 h0->single_decode_warning = 1;
4035 return 1; // deblocking switched inside frame
4040 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4041 slice_group_change_cycle= get_bits(&s->gb, ?);
4044 h0->last_slice_type = slice_type;
4045 h->slice_num = ++h0->current_slice;
4048 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4052 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4053 +(h->ref_list[j][i].reference&3);
4056 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4057 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4059 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4060 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4062 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4064 av_get_pict_type_char(h->slice_type),
4065 pps_id, h->frame_num,
4066 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4067 h->ref_count[0], h->ref_count[1],
4069 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4071 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4072 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4082 static inline int get_level_prefix(GetBitContext *gb){
4086 OPEN_READER(re, gb);
4087 UPDATE_CACHE(re, gb);
4088 buf=GET_CACHE(re, gb);
4090 log= 32 - av_log2(buf);
4092 print_bin(buf>>(32-log), log);
4093 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4096 LAST_SKIP_BITS(re, gb, log);
4097 CLOSE_READER(re, gb);
4102 static inline int get_dct8x8_allowed(H264Context *h){
4105 if(!IS_SUB_8X8(h->sub_mb_type[i])
4106 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4113 * decodes a residual block.
4114 * @param n block index
4115 * @param scantable scantable
4116 * @param max_coeff number of coefficients in the block
4117 * @return <0 if an error occurred
4119 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4120 MpegEncContext * const s = &h->s;
4121 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4123 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4125 //FIXME put trailing_onex into the context
4127 if(n == CHROMA_DC_BLOCK_INDEX){
4128 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4129 total_coeff= coeff_token>>2;
4131 if(n == LUMA_DC_BLOCK_INDEX){
4132 total_coeff= pred_non_zero_count(h, 0);
4133 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4134 total_coeff= coeff_token>>2;
4136 total_coeff= pred_non_zero_count(h, n);
4137 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4138 total_coeff= coeff_token>>2;
4139 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4143 //FIXME set last_non_zero?
4147 if(total_coeff > (unsigned)max_coeff) {
4148 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4152 trailing_ones= coeff_token&3;
4153 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4154 assert(total_coeff<=16);
4156 for(i=0; i<trailing_ones; i++){
4157 level[i]= 1 - 2*get_bits1(gb);
4161 int level_code, mask;
4162 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4163 int prefix= get_level_prefix(gb);
4165 //first coefficient has suffix_length equal to 0 or 1
4166 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4168 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4170 level_code= (prefix<<suffix_length); //part
4171 }else if(prefix==14){
4173 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4175 level_code= prefix + get_bits(gb, 4); //part
4177 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4178 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4180 level_code += (1<<(prefix-3))-4096;
4183 if(trailing_ones < 3) level_code += 2;
4188 mask= -(level_code&1);
4189 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4192 //remaining coefficients have suffix_length > 0
4193 for(;i<total_coeff;i++) {
4194 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4195 prefix = get_level_prefix(gb);
4197 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4199 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4201 level_code += (1<<(prefix-3))-4096;
4203 mask= -(level_code&1);
4204 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4205 if(level_code > suffix_limit[suffix_length])
4210 if(total_coeff == max_coeff)
4213 if(n == CHROMA_DC_BLOCK_INDEX)
4214 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4216 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4219 coeff_num = zeros_left + total_coeff - 1;
4220 j = scantable[coeff_num];
4222 block[j] = level[0];
4223 for(i=1;i<total_coeff;i++) {
4226 else if(zeros_left < 7){
4227 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4229 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4231 zeros_left -= run_before;
4232 coeff_num -= 1 + run_before;
4233 j= scantable[ coeff_num ];
4238 block[j] = (level[0] * qmul[j] + 32)>>6;
4239 for(i=1;i<total_coeff;i++) {
4242 else if(zeros_left < 7){
4243 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4245 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4247 zeros_left -= run_before;
4248 coeff_num -= 1 + run_before;
4249 j= scantable[ coeff_num ];
4251 block[j]= (level[i] * qmul[j] + 32)>>6;
4256 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4263 static void predict_field_decoding_flag(H264Context *h){
4264 MpegEncContext * const s = &h->s;
4265 const int mb_xy= h->mb_xy;
4266 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4267 ? s->current_picture.mb_type[mb_xy-1]
4268 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4269 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4271 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4275 * decodes a P_SKIP or B_SKIP macroblock
4277 static void decode_mb_skip(H264Context *h){
4278 MpegEncContext * const s = &h->s;
4279 const int mb_xy= h->mb_xy;
4282 memset(h->non_zero_count[mb_xy], 0, 16);
4283 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4286 mb_type|= MB_TYPE_INTERLACED;
4288 if( h->slice_type_nos == FF_B_TYPE )
4290 // just for fill_caches. pred_direct_motion will set the real mb_type
4291 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4293 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4294 pred_direct_motion(h, &mb_type);
4295 mb_type|= MB_TYPE_SKIP;
4300 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4302 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4303 pred_pskip_motion(h, &mx, &my);
4304 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4305 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4308 write_back_motion(h, mb_type);
4309 s->current_picture.mb_type[mb_xy]= mb_type;
4310 s->current_picture.qscale_table[mb_xy]= s->qscale;
4311 h->slice_table[ mb_xy ]= h->slice_num;
4312 h->prev_mb_skipped= 1;
4316 * decodes a macroblock
4317 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4319 static int decode_mb_cavlc(H264Context *h){
4320 MpegEncContext * const s = &h->s;
4322 int partition_count;
4323 unsigned int mb_type, cbp;
4324 int dct8x8_allowed= h->pps.transform_8x8_mode;
4326 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4328 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4330 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4331 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4333 if(h->slice_type_nos != FF_I_TYPE){
4334 if(s->mb_skip_run==-1)
4335 s->mb_skip_run= get_ue_golomb(&s->gb);
4337 if (s->mb_skip_run--) {
4338 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4339 if(s->mb_skip_run==0)
4340 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4342 predict_field_decoding_flag(h);
4349 if( (s->mb_y&1) == 0 )
4350 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4353 h->prev_mb_skipped= 0;
4355 mb_type= get_ue_golomb(&s->gb);
4356 if(h->slice_type_nos == FF_B_TYPE){
4358 partition_count= b_mb_type_info[mb_type].partition_count;
4359 mb_type= b_mb_type_info[mb_type].type;
4362 goto decode_intra_mb;
4364 }else if(h->slice_type_nos == FF_P_TYPE){
4366 partition_count= p_mb_type_info[mb_type].partition_count;
4367 mb_type= p_mb_type_info[mb_type].type;
4370 goto decode_intra_mb;
4373 assert(h->slice_type_nos == FF_I_TYPE);
4374 if(h->slice_type == FF_SI_TYPE && mb_type)
4378 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4382 cbp= i_mb_type_info[mb_type].cbp;
4383 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4384 mb_type= i_mb_type_info[mb_type].type;
4388 mb_type |= MB_TYPE_INTERLACED;
4390 h->slice_table[ mb_xy ]= h->slice_num;
4392 if(IS_INTRA_PCM(mb_type)){
4395 // We assume these blocks are very rare so we do not optimize it.
4396 align_get_bits(&s->gb);
4398 // The pixels are stored in the same order as levels in h->mb array.
4399 for(x=0; x < (CHROMA ? 384 : 256); x++){
4400 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4403 // In deblocking, the quantizer is 0
4404 s->current_picture.qscale_table[mb_xy]= 0;
4405 // All coeffs are present
4406 memset(h->non_zero_count[mb_xy], 16, 16);
4408 s->current_picture.mb_type[mb_xy]= mb_type;
4413 h->ref_count[0] <<= 1;
4414 h->ref_count[1] <<= 1;
4417 fill_caches(h, mb_type, 0);
4420 if(IS_INTRA(mb_type)){
4422 // init_top_left_availability(h);
4423 if(IS_INTRA4x4(mb_type)){
4426 if(dct8x8_allowed && get_bits1(&s->gb)){
4427 mb_type |= MB_TYPE_8x8DCT;
4431 // fill_intra4x4_pred_table(h);
4432 for(i=0; i<16; i+=di){
4433 int mode= pred_intra_mode(h, i);
4435 if(!get_bits1(&s->gb)){
4436 const int rem_mode= get_bits(&s->gb, 3);
4437 mode = rem_mode + (rem_mode >= mode);
4441 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4443 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4445 write_back_intra_pred_mode(h);
4446 if( check_intra4x4_pred_mode(h) < 0)
4449 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4450 if(h->intra16x16_pred_mode < 0)
4454 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4457 h->chroma_pred_mode= pred_mode;
4459 }else if(partition_count==4){
4460 int i, j, sub_partition_count[4], list, ref[2][4];
4462 if(h->slice_type_nos == FF_B_TYPE){
4464 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4465 if(h->sub_mb_type[i] >=13){
4466 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4469 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4470 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4472 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4473 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4474 pred_direct_motion(h, &mb_type);
4475 h->ref_cache[0][scan8[4]] =
4476 h->ref_cache[1][scan8[4]] =
4477 h->ref_cache[0][scan8[12]] =
4478 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4481 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4483 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4484 if(h->sub_mb_type[i] >=4){
4485 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4488 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4489 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4493 for(list=0; list<h->list_count; list++){
4494 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4496 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4497 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4498 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4500 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4512 dct8x8_allowed = get_dct8x8_allowed(h);
4514 for(list=0; list<h->list_count; list++){
4516 if(IS_DIRECT(h->sub_mb_type[i])) {
4517 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4520 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4521 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4523 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4524 const int sub_mb_type= h->sub_mb_type[i];
4525 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4526 for(j=0; j<sub_partition_count[i]; j++){
4528 const int index= 4*i + block_width*j;
4529 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4530 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4531 mx += get_se_golomb(&s->gb);
4532 my += get_se_golomb(&s->gb);
4533 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4535 if(IS_SUB_8X8(sub_mb_type)){
4537 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4539 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4540 }else if(IS_SUB_8X4(sub_mb_type)){
4541 mv_cache[ 1 ][0]= mx;
4542 mv_cache[ 1 ][1]= my;
4543 }else if(IS_SUB_4X8(sub_mb_type)){
4544 mv_cache[ 8 ][0]= mx;
4545 mv_cache[ 8 ][1]= my;
4547 mv_cache[ 0 ][0]= mx;
4548 mv_cache[ 0 ][1]= my;
4551 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4557 }else if(IS_DIRECT(mb_type)){
4558 pred_direct_motion(h, &mb_type);
4559 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4561 int list, mx, my, i;
4562 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4563 if(IS_16X16(mb_type)){
4564 for(list=0; list<h->list_count; list++){
4566 if(IS_DIR(mb_type, 0, list)){
4567 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4568 if(val >= h->ref_count[list]){
4569 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4573 val= LIST_NOT_USED&0xFF;
4574 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4576 for(list=0; list<h->list_count; list++){
4578 if(IS_DIR(mb_type, 0, list)){
4579 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4580 mx += get_se_golomb(&s->gb);
4581 my += get_se_golomb(&s->gb);
4582 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4584 val= pack16to32(mx,my);
4587 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4590 else if(IS_16X8(mb_type)){
4591 for(list=0; list<h->list_count; list++){
4594 if(IS_DIR(mb_type, i, list)){
4595 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4596 if(val >= h->ref_count[list]){
4597 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4601 val= LIST_NOT_USED&0xFF;
4602 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4605 for(list=0; list<h->list_count; list++){
4608 if(IS_DIR(mb_type, i, list)){
4609 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4610 mx += get_se_golomb(&s->gb);
4611 my += get_se_golomb(&s->gb);
4612 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4614 val= pack16to32(mx,my);
4617 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4621 assert(IS_8X16(mb_type));
4622 for(list=0; list<h->list_count; list++){
4625 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4626 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4627 if(val >= h->ref_count[list]){
4628 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4632 val= LIST_NOT_USED&0xFF;
4633 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4636 for(list=0; list<h->list_count; list++){
4639 if(IS_DIR(mb_type, i, list)){
4640 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4641 mx += get_se_golomb(&s->gb);
4642 my += get_se_golomb(&s->gb);
4643 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4645 val= pack16to32(mx,my);
4648 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4654 if(IS_INTER(mb_type))
4655 write_back_motion(h, mb_type);
4657 if(!IS_INTRA16x16(mb_type)){
4658 cbp= get_ue_golomb(&s->gb);
4660 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4665 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4666 else cbp= golomb_to_inter_cbp [cbp];
4668 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4669 else cbp= golomb_to_inter_cbp_gray[cbp];
4674 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4675 if(get_bits1(&s->gb)){
4676 mb_type |= MB_TYPE_8x8DCT;
4677 h->cbp_table[mb_xy]= cbp;
4680 s->current_picture.mb_type[mb_xy]= mb_type;
4682 if(cbp || IS_INTRA16x16(mb_type)){
4683 int i8x8, i4x4, chroma_idx;
4685 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4686 const uint8_t *scan, *scan8x8, *dc_scan;
4688 // fill_non_zero_count_cache(h);
4690 if(IS_INTERLACED(mb_type)){
4691 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4692 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4693 dc_scan= luma_dc_field_scan;
4695 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4696 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4697 dc_scan= luma_dc_zigzag_scan;
4700 dquant= get_se_golomb(&s->gb);
4702 if( dquant > 25 || dquant < -26 ){
4703 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4707 s->qscale += dquant;
4708 if(((unsigned)s->qscale) > 51){
4709 if(s->qscale<0) s->qscale+= 52;
4710 else s->qscale-= 52;
4713 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4714 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4715 if(IS_INTRA16x16(mb_type)){
4716 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4717 return -1; //FIXME continue if partitioned and other return -1 too
4720 assert((cbp&15) == 0 || (cbp&15) == 15);
4723 for(i8x8=0; i8x8<4; i8x8++){
4724 for(i4x4=0; i4x4<4; i4x4++){
4725 const int index= i4x4 + 4*i8x8;
4726 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4732 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4735 for(i8x8=0; i8x8<4; i8x8++){
4736 if(cbp & (1<<i8x8)){
4737 if(IS_8x8DCT(mb_type)){
4738 DCTELEM *buf = &h->mb[64*i8x8];
4740 for(i4x4=0; i4x4<4; i4x4++){
4741 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4742 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4745 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4746 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4748 for(i4x4=0; i4x4<4; i4x4++){
4749 const int index= i4x4 + 4*i8x8;
4751 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4757 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4758 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4764 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4765 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4771 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4772 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4773 for(i4x4=0; i4x4<4; i4x4++){
4774 const int index= 16 + 4*chroma_idx + i4x4;
4775 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4781 uint8_t * const nnz= &h->non_zero_count_cache[0];
4782 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4783 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4786 uint8_t * const nnz= &h->non_zero_count_cache[0];
4787 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4788 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4789 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4791 s->current_picture.qscale_table[mb_xy]= s->qscale;
4792 write_back_non_zero_count(h);
4795 h->ref_count[0] >>= 1;
4796 h->ref_count[1] >>= 1;
4802 static int decode_cabac_field_decoding_flag(H264Context *h) {
4803 MpegEncContext * const s = &h->s;
4804 const int mb_x = s->mb_x;
4805 const int mb_y = s->mb_y & ~1;
4806 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4807 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4809 unsigned int ctx = 0;
4811 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4814 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4818 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4821 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4822 uint8_t *state= &h->cabac_state[ctx_base];
4826 MpegEncContext * const s = &h->s;
4827 const int mba_xy = h->left_mb_xy[0];
4828 const int mbb_xy = h->top_mb_xy;
4830 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4832 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4834 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4835 return 0; /* I4x4 */
4838 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4839 return 0; /* I4x4 */
4842 if( get_cabac_terminate( &h->cabac ) )
4843 return 25; /* PCM */
4845 mb_type = 1; /* I16x16 */
4846 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4847 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4848 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4849 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4850 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4854 static int decode_cabac_mb_type( H264Context *h ) {
4855 MpegEncContext * const s = &h->s;
4857 if( h->slice_type_nos == FF_I_TYPE ) {
4858 return decode_cabac_intra_mb_type(h, 3, 1);
4859 } else if( h->slice_type_nos == FF_P_TYPE ) {
4860 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4862 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4863 /* P_L0_D16x16, P_8x8 */
4864 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4866 /* P_L0_D8x16, P_L0_D16x8 */
4867 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4870 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4872 } else if( h->slice_type_nos == FF_B_TYPE ) {
4873 const int mba_xy = h->left_mb_xy[0];
4874 const int mbb_xy = h->top_mb_xy;
4878 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4880 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4883 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4884 return 0; /* B_Direct_16x16 */
4886 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4887 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4890 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4891 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4892 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4893 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4895 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4896 else if( bits == 13 ) {
4897 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4898 } else if( bits == 14 )
4899 return 11; /* B_L1_L0_8x16 */
4900 else if( bits == 15 )
4901 return 22; /* B_8x8 */
4903 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4904 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4906 /* TODO SI/SP frames? */
4911 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4912 MpegEncContext * const s = &h->s;
4916 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4917 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4920 && h->slice_table[mba_xy] == h->slice_num
4921 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4922 mba_xy += s->mb_stride;
4924 mbb_xy = mb_xy - s->mb_stride;
4926 && h->slice_table[mbb_xy] == h->slice_num
4927 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4928 mbb_xy -= s->mb_stride;
4930 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4932 int mb_xy = h->mb_xy;
4934 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4937 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4939 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4942 if( h->slice_type_nos == FF_B_TYPE )
4944 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4947 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4950 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4953 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4954 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4955 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4957 if( mode >= pred_mode )
4963 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4964 const int mba_xy = h->left_mb_xy[0];
4965 const int mbb_xy = h->top_mb_xy;
4969 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4970 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4973 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4976 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4979 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4981 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4987 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4988 int cbp_b, cbp_a, ctx, cbp = 0;
4990 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4991 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4993 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4994 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4995 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4996 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4997 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4998 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
4999 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5000 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5003 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5007 cbp_a = (h->left_cbp>>4)&0x03;
5008 cbp_b = (h-> top_cbp>>4)&0x03;
5011 if( cbp_a > 0 ) ctx++;
5012 if( cbp_b > 0 ) ctx += 2;
5013 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5017 if( cbp_a == 2 ) ctx++;
5018 if( cbp_b == 2 ) ctx += 2;
5019 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5021 static int decode_cabac_mb_dqp( H264Context *h) {
5025 if( h->last_qscale_diff != 0 )
5028 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5034 if(val > 102) //prevent infinite loop
5041 return -(val + 1)/2;
5043 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5044 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5046 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5048 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5052 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5054 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5055 return 0; /* B_Direct_8x8 */
5056 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5057 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5059 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5060 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5061 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5064 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5065 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5069 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5070 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5073 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5074 int refa = h->ref_cache[list][scan8[n] - 1];
5075 int refb = h->ref_cache[list][scan8[n] - 8];
5079 if( h->slice_type_nos == FF_B_TYPE) {
5080 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5082 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5091 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5097 if(ref >= 32 /*h->ref_list[list]*/){
5098 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5099 return 0; //FIXME we should return -1 and check the return everywhere
5105 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5106 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5107 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5108 int ctxbase = (l == 0) ? 40 : 47;
5113 else if( amvd > 32 )
5118 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5123 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5131 while( get_cabac_bypass( &h->cabac ) ) {
5135 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5140 if( get_cabac_bypass( &h->cabac ) )
5144 return get_cabac_bypass_sign( &h->cabac, -mvd );
5147 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5153 nza = h->left_cbp&0x100;
5154 nzb = h-> top_cbp&0x100;
5156 nza = (h->left_cbp>>(6+idx))&0x01;
5157 nzb = (h-> top_cbp>>(6+idx))&0x01;
5161 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5162 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5164 assert(cat == 1 || cat == 2);
5165 nza = h->non_zero_count_cache[scan8[idx] - 1];
5166 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5176 return ctx + 4 * cat;
5179 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5180 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5181 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5182 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5183 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5186 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5187 static const int significant_coeff_flag_offset[2][6] = {
5188 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5189 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5191 static const int last_coeff_flag_offset[2][6] = {
5192 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5193 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5195 static const int coeff_abs_level_m1_offset[6] = {
5196 227+0, 227+10, 227+20, 227+30, 227+39, 426
5198 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5199 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5200 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5201 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5202 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5203 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5204 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5205 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5206 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5208 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5209 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5210 * map node ctx => cabac ctx for level=1 */
5211 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5212 /* map node ctx => cabac ctx for level>1 */
5213 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5214 static const uint8_t coeff_abs_level_transition[2][8] = {
5215 /* update node ctx after decoding a level=1 */
5216 { 1, 2, 3, 3, 4, 5, 6, 7 },
5217 /* update node ctx after decoding a level>1 */
5218 { 4, 4, 4, 4, 5, 6, 7, 7 }
5224 int coeff_count = 0;
5227 uint8_t *significant_coeff_ctx_base;
5228 uint8_t *last_coeff_ctx_base;
5229 uint8_t *abs_level_m1_ctx_base;
5232 #define CABAC_ON_STACK
5234 #ifdef CABAC_ON_STACK
5237 cc.range = h->cabac.range;
5238 cc.low = h->cabac.low;
5239 cc.bytestream= h->cabac.bytestream;
5241 #define CC &h->cabac
5245 /* cat: 0-> DC 16x16 n = 0
5246 * 1-> AC 16x16 n = luma4x4idx
5247 * 2-> Luma4x4 n = luma4x4idx
5248 * 3-> DC Chroma n = iCbCr
5249 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5250 * 5-> Luma8x8 n = 4 * luma8x8idx
5253 /* read coded block flag */
5254 if( is_dc || cat != 5 ) {
5255 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5258 h->non_zero_count_cache[scan8[16+n]] = 0;
5260 h->non_zero_count_cache[scan8[n]] = 0;
5263 #ifdef CABAC_ON_STACK
5264 h->cabac.range = cc.range ;
5265 h->cabac.low = cc.low ;
5266 h->cabac.bytestream= cc.bytestream;
5272 significant_coeff_ctx_base = h->cabac_state
5273 + significant_coeff_flag_offset[MB_FIELD][cat];
5274 last_coeff_ctx_base = h->cabac_state
5275 + last_coeff_flag_offset[MB_FIELD][cat];
5276 abs_level_m1_ctx_base = h->cabac_state
5277 + coeff_abs_level_m1_offset[cat];
5279 if( !is_dc && cat == 5 ) {
5280 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5281 for(last= 0; last < coefs; last++) { \
5282 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5283 if( get_cabac( CC, sig_ctx )) { \
5284 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5285 index[coeff_count++] = last; \
5286 if( get_cabac( CC, last_ctx ) ) { \
5292 if( last == max_coeff -1 ) {\
5293 index[coeff_count++] = last;\
5295 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5296 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5297 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5299 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5301 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5303 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5306 assert(coeff_count > 0);
5310 h->cbp_table[h->mb_xy] |= 0x100;
5312 h->cbp_table[h->mb_xy] |= 0x40 << n;
5315 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5317 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5319 assert( cat == 1 || cat == 2 );
5320 h->non_zero_count_cache[scan8[n]] = coeff_count;
5325 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5327 int j= scantable[index[--coeff_count]];
5329 if( get_cabac( CC, ctx ) == 0 ) {
5330 node_ctx = coeff_abs_level_transition[0][node_ctx];
5332 block[j] = get_cabac_bypass_sign( CC, -1);
5334 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5338 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5339 node_ctx = coeff_abs_level_transition[1][node_ctx];
5341 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5345 if( coeff_abs >= 15 ) {
5347 while( get_cabac_bypass( CC ) ) {
5353 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5359 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5361 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5364 } while( coeff_count );
5365 #ifdef CABAC_ON_STACK
5366 h->cabac.range = cc.range ;
5367 h->cabac.low = cc.low ;
5368 h->cabac.bytestream= cc.bytestream;
5373 #ifndef CONFIG_SMALL
5374 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5375 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5378 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5379 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5383 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5385 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5387 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5388 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5392 static inline void compute_mb_neighbors(H264Context *h)
5394 MpegEncContext * const s = &h->s;
5395 const int mb_xy = h->mb_xy;
5396 h->top_mb_xy = mb_xy - s->mb_stride;
5397 h->left_mb_xy[0] = mb_xy - 1;
5399 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5400 const int top_pair_xy = pair_xy - s->mb_stride;
5401 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5402 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5403 const int curr_mb_frame_flag = !MB_FIELD;
5404 const int bottom = (s->mb_y & 1);
5406 ? !curr_mb_frame_flag // bottom macroblock
5407 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5409 h->top_mb_xy -= s->mb_stride;
5411 if (left_mb_frame_flag != curr_mb_frame_flag) {
5412 h->left_mb_xy[0] = pair_xy - 1;
5414 } else if (FIELD_PICTURE) {
5415 h->top_mb_xy -= s->mb_stride;
5421 * decodes a macroblock
5422 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5424 static int decode_mb_cabac(H264Context *h) {
5425 MpegEncContext * const s = &h->s;
5427 int mb_type, partition_count, cbp = 0;
5428 int dct8x8_allowed= h->pps.transform_8x8_mode;
5430 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5432 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5434 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5435 if( h->slice_type_nos != FF_I_TYPE ) {
5437 /* a skipped mb needs the aff flag from the following mb */
5438 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5439 predict_field_decoding_flag(h);
5440 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5441 skip = h->next_mb_skipped;
5443 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5444 /* read skip flags */
5446 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5447 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5448 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5449 if(h->next_mb_skipped)
5450 predict_field_decoding_flag(h);
5452 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5457 h->cbp_table[mb_xy] = 0;
5458 h->chroma_pred_mode_table[mb_xy] = 0;
5459 h->last_qscale_diff = 0;
5466 if( (s->mb_y&1) == 0 )
5468 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5471 h->prev_mb_skipped = 0;
5473 compute_mb_neighbors(h);
5474 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5475 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5479 if( h->slice_type_nos == FF_B_TYPE ) {
5481 partition_count= b_mb_type_info[mb_type].partition_count;
5482 mb_type= b_mb_type_info[mb_type].type;
5485 goto decode_intra_mb;
5487 } else if( h->slice_type_nos == FF_P_TYPE ) {
5489 partition_count= p_mb_type_info[mb_type].partition_count;
5490 mb_type= p_mb_type_info[mb_type].type;
5493 goto decode_intra_mb;
5496 if(h->slice_type == FF_SI_TYPE && mb_type)
5498 assert(h->slice_type_nos == FF_I_TYPE);
5500 partition_count = 0;
5501 cbp= i_mb_type_info[mb_type].cbp;
5502 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5503 mb_type= i_mb_type_info[mb_type].type;
5506 mb_type |= MB_TYPE_INTERLACED;
5508 h->slice_table[ mb_xy ]= h->slice_num;
5510 if(IS_INTRA_PCM(mb_type)) {
5513 // We assume these blocks are very rare so we do not optimize it.
5514 // FIXME The two following lines get the bitstream position in the cabac
5515 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5516 ptr= h->cabac.bytestream;
5517 if(h->cabac.low&0x1) ptr--;
5519 if(h->cabac.low&0x1FF) ptr--;
5522 // The pixels are stored in the same order as levels in h->mb array.
5523 memcpy(h->mb, ptr, 256); ptr+=256;
5525 memcpy(h->mb+128, ptr, 128); ptr+=128;
5528 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5530 // All blocks are present
5531 h->cbp_table[mb_xy] = 0x1ef;
5532 h->chroma_pred_mode_table[mb_xy] = 0;
5533 // In deblocking, the quantizer is 0
5534 s->current_picture.qscale_table[mb_xy]= 0;
5535 // All coeffs are present
5536 memset(h->non_zero_count[mb_xy], 16, 16);
5537 s->current_picture.mb_type[mb_xy]= mb_type;
5538 h->last_qscale_diff = 0;
5543 h->ref_count[0] <<= 1;
5544 h->ref_count[1] <<= 1;
5547 fill_caches(h, mb_type, 0);
5549 if( IS_INTRA( mb_type ) ) {
5551 if( IS_INTRA4x4( mb_type ) ) {
5552 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5553 mb_type |= MB_TYPE_8x8DCT;
5554 for( i = 0; i < 16; i+=4 ) {
5555 int pred = pred_intra_mode( h, i );
5556 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5557 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5560 for( i = 0; i < 16; i++ ) {
5561 int pred = pred_intra_mode( h, i );
5562 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5564 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5567 write_back_intra_pred_mode(h);
5568 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5570 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5571 if( h->intra16x16_pred_mode < 0 ) return -1;
5574 h->chroma_pred_mode_table[mb_xy] =
5575 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5577 pred_mode= check_intra_pred_mode( h, pred_mode );
5578 if( pred_mode < 0 ) return -1;
5579 h->chroma_pred_mode= pred_mode;
5581 } else if( partition_count == 4 ) {
5582 int i, j, sub_partition_count[4], list, ref[2][4];
5584 if( h->slice_type_nos == FF_B_TYPE ) {
5585 for( i = 0; i < 4; i++ ) {
5586 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5587 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5588 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5590 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5591 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5592 pred_direct_motion(h, &mb_type);
5593 h->ref_cache[0][scan8[4]] =
5594 h->ref_cache[1][scan8[4]] =
5595 h->ref_cache[0][scan8[12]] =
5596 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5597 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5598 for( i = 0; i < 4; i++ )
5599 if( IS_DIRECT(h->sub_mb_type[i]) )
5600 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5604 for( i = 0; i < 4; i++ ) {
5605 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5606 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5607 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5611 for( list = 0; list < h->list_count; list++ ) {
5612 for( i = 0; i < 4; i++ ) {
5613 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5614 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5615 if( h->ref_count[list] > 1 )
5616 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5622 h->ref_cache[list][ scan8[4*i]+1 ]=
5623 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5628 dct8x8_allowed = get_dct8x8_allowed(h);
5630 for(list=0; list<h->list_count; list++){
5632 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5633 if(IS_DIRECT(h->sub_mb_type[i])){
5634 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5638 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5639 const int sub_mb_type= h->sub_mb_type[i];
5640 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5641 for(j=0; j<sub_partition_count[i]; j++){
5644 const int index= 4*i + block_width*j;
5645 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5646 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5647 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5649 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5650 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5651 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5653 if(IS_SUB_8X8(sub_mb_type)){
5655 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5657 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5660 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5662 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5663 }else if(IS_SUB_8X4(sub_mb_type)){
5664 mv_cache[ 1 ][0]= mx;
5665 mv_cache[ 1 ][1]= my;
5667 mvd_cache[ 1 ][0]= mx - mpx;
5668 mvd_cache[ 1 ][1]= my - mpy;
5669 }else if(IS_SUB_4X8(sub_mb_type)){
5670 mv_cache[ 8 ][0]= mx;
5671 mv_cache[ 8 ][1]= my;
5673 mvd_cache[ 8 ][0]= mx - mpx;
5674 mvd_cache[ 8 ][1]= my - mpy;
5676 mv_cache[ 0 ][0]= mx;
5677 mv_cache[ 0 ][1]= my;
5679 mvd_cache[ 0 ][0]= mx - mpx;
5680 mvd_cache[ 0 ][1]= my - mpy;
5683 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5684 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5685 p[0] = p[1] = p[8] = p[9] = 0;
5686 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5690 } else if( IS_DIRECT(mb_type) ) {
5691 pred_direct_motion(h, &mb_type);
5692 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5693 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5694 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5696 int list, mx, my, i, mpx, mpy;
5697 if(IS_16X16(mb_type)){
5698 for(list=0; list<h->list_count; list++){
5699 if(IS_DIR(mb_type, 0, list)){
5700 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5701 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5703 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5705 for(list=0; list<h->list_count; list++){
5706 if(IS_DIR(mb_type, 0, list)){
5707 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5709 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5710 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5711 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5713 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5714 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5716 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5719 else if(IS_16X8(mb_type)){
5720 for(list=0; list<h->list_count; list++){
5722 if(IS_DIR(mb_type, i, list)){
5723 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5724 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5726 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5729 for(list=0; list<h->list_count; list++){
5731 if(IS_DIR(mb_type, i, list)){
5732 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5733 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5734 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5735 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5737 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5738 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5740 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5741 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5746 assert(IS_8X16(mb_type));
5747 for(list=0; list<h->list_count; list++){
5749 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5750 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5751 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5753 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5756 for(list=0; list<h->list_count; list++){
5758 if(IS_DIR(mb_type, i, list)){
5759 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5760 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5761 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5763 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5764 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5765 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5767 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5768 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5775 if( IS_INTER( mb_type ) ) {
5776 h->chroma_pred_mode_table[mb_xy] = 0;
5777 write_back_motion( h, mb_type );
5780 if( !IS_INTRA16x16( mb_type ) ) {
5781 cbp = decode_cabac_mb_cbp_luma( h );
5783 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5786 h->cbp_table[mb_xy] = h->cbp = cbp;
5788 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5789 if( decode_cabac_mb_transform_size( h ) )
5790 mb_type |= MB_TYPE_8x8DCT;
5792 s->current_picture.mb_type[mb_xy]= mb_type;
5794 if( cbp || IS_INTRA16x16( mb_type ) ) {
5795 const uint8_t *scan, *scan8x8, *dc_scan;
5796 const uint32_t *qmul;
5799 if(IS_INTERLACED(mb_type)){
5800 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5801 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5802 dc_scan= luma_dc_field_scan;
5804 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5805 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5806 dc_scan= luma_dc_zigzag_scan;
5809 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5810 if( dqp == INT_MIN ){
5811 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5815 if(((unsigned)s->qscale) > 51){
5816 if(s->qscale<0) s->qscale+= 52;
5817 else s->qscale-= 52;
5819 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5820 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5822 if( IS_INTRA16x16( mb_type ) ) {
5824 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5825 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5828 qmul = h->dequant4_coeff[0][s->qscale];
5829 for( i = 0; i < 16; i++ ) {
5830 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5831 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5834 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5838 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5839 if( cbp & (1<<i8x8) ) {
5840 if( IS_8x8DCT(mb_type) ) {
5841 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5842 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5844 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5845 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5846 const int index = 4*i8x8 + i4x4;
5847 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5849 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5850 //STOP_TIMER("decode_residual")
5854 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5855 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5862 for( c = 0; c < 2; c++ ) {
5863 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5864 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5870 for( c = 0; c < 2; c++ ) {
5871 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5872 for( i = 0; i < 4; i++ ) {
5873 const int index = 16 + 4 * c + i;
5874 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5875 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5879 uint8_t * const nnz= &h->non_zero_count_cache[0];
5880 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5881 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5884 uint8_t * const nnz= &h->non_zero_count_cache[0];
5885 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5886 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5887 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5888 h->last_qscale_diff = 0;
5891 s->current_picture.qscale_table[mb_xy]= s->qscale;
5892 write_back_non_zero_count(h);
5895 h->ref_count[0] >>= 1;
5896 h->ref_count[1] >>= 1;
5903 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5905 const int index_a = qp + h->slice_alpha_c0_offset;
5906 const int alpha = (alpha_table+52)[index_a];
5907 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5912 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5913 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5915 /* 16px edge length, because bS=4 is triggered by being at
5916 * the edge of an intra MB, so all 4 bS are the same */
5917 for( d = 0; d < 16; d++ ) {
5918 const int p0 = pix[-1];
5919 const int p1 = pix[-2];
5920 const int p2 = pix[-3];
5922 const int q0 = pix[0];
5923 const int q1 = pix[1];
5924 const int q2 = pix[2];
5926 if( FFABS( p0 - q0 ) < alpha &&
5927 FFABS( p1 - p0 ) < beta &&
5928 FFABS( q1 - q0 ) < beta ) {
5930 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5931 if( FFABS( p2 - p0 ) < beta)
5933 const int p3 = pix[-4];
5935 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5936 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5937 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5940 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5942 if( FFABS( q2 - q0 ) < beta)
5944 const int q3 = pix[3];
5946 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5947 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5948 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5951 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5955 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5956 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5958 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5964 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5966 const int index_a = qp + h->slice_alpha_c0_offset;
5967 const int alpha = (alpha_table+52)[index_a];
5968 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5973 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5974 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5976 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5980 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5982 for( i = 0; i < 16; i++, pix += stride) {
5988 int bS_index = (i >> 1);
5991 bS_index |= (i & 1);
5994 if( bS[bS_index] == 0 ) {
5998 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
5999 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6000 alpha = (alpha_table+52)[index_a];
6001 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6003 if( bS[bS_index] < 4 ) {
6004 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6005 const int p0 = pix[-1];
6006 const int p1 = pix[-2];
6007 const int p2 = pix[-3];
6008 const int q0 = pix[0];
6009 const int q1 = pix[1];
6010 const int q2 = pix[2];
6012 if( FFABS( p0 - q0 ) < alpha &&
6013 FFABS( p1 - p0 ) < beta &&
6014 FFABS( q1 - q0 ) < beta ) {
6018 if( FFABS( p2 - p0 ) < beta ) {
6019 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6022 if( FFABS( q2 - q0 ) < beta ) {
6023 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6027 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6028 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6029 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6030 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6033 const int p0 = pix[-1];
6034 const int p1 = pix[-2];
6035 const int p2 = pix[-3];
6037 const int q0 = pix[0];
6038 const int q1 = pix[1];
6039 const int q2 = pix[2];
6041 if( FFABS( p0 - q0 ) < alpha &&
6042 FFABS( p1 - p0 ) < beta &&
6043 FFABS( q1 - q0 ) < beta ) {
6045 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6046 if( FFABS( p2 - p0 ) < beta)
6048 const int p3 = pix[-4];
6050 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6051 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6052 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6055 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6057 if( FFABS( q2 - q0 ) < beta)
6059 const int q3 = pix[3];
6061 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6062 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6063 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6066 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6070 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6071 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6073 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6078 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6080 for( i = 0; i < 8; i++, pix += stride) {
6088 if( bS[bS_index] == 0 ) {
6092 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6093 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6094 alpha = (alpha_table+52)[index_a];
6095 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6097 if( bS[bS_index] < 4 ) {
6098 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6099 const int p0 = pix[-1];
6100 const int p1 = pix[-2];
6101 const int q0 = pix[0];
6102 const int q1 = pix[1];
6104 if( FFABS( p0 - q0 ) < alpha &&
6105 FFABS( p1 - p0 ) < beta &&
6106 FFABS( q1 - q0 ) < beta ) {
6107 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6109 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6110 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6111 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6114 const int p0 = pix[-1];
6115 const int p1 = pix[-2];
6116 const int q0 = pix[0];
6117 const int q1 = pix[1];
6119 if( FFABS( p0 - q0 ) < alpha &&
6120 FFABS( p1 - p0 ) < beta &&
6121 FFABS( q1 - q0 ) < beta ) {
6123 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6124 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6125 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6131 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6133 const int index_a = qp + h->slice_alpha_c0_offset;
6134 const int alpha = (alpha_table+52)[index_a];
6135 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6136 const int pix_next = stride;
6141 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6142 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6144 /* 16px edge length, see filter_mb_edgev */
6145 for( d = 0; d < 16; d++ ) {
6146 const int p0 = pix[-1*pix_next];
6147 const int p1 = pix[-2*pix_next];
6148 const int p2 = pix[-3*pix_next];
6149 const int q0 = pix[0];
6150 const int q1 = pix[1*pix_next];
6151 const int q2 = pix[2*pix_next];
6153 if( FFABS( p0 - q0 ) < alpha &&
6154 FFABS( p1 - p0 ) < beta &&
6155 FFABS( q1 - q0 ) < beta ) {
6157 const int p3 = pix[-4*pix_next];
6158 const int q3 = pix[ 3*pix_next];
6160 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6161 if( FFABS( p2 - p0 ) < beta) {
6163 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6164 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6165 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6168 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6170 if( FFABS( q2 - q0 ) < beta) {
6172 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6173 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6174 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6177 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6181 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6182 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6184 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6191 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6193 const int index_a = qp + h->slice_alpha_c0_offset;
6194 const int alpha = (alpha_table+52)[index_a];
6195 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6200 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6201 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6203 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6207 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6208 MpegEncContext * const s = &h->s;
6209 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6211 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6215 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6217 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6218 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6219 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6222 assert(!FRAME_MBAFF);
6224 mb_type = s->current_picture.mb_type[mb_xy];
6225 qp = s->current_picture.qscale_table[mb_xy];
6226 qp0 = s->current_picture.qscale_table[mb_xy-1];
6227 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6228 qpc = get_chroma_qp( h, 0, qp );
6229 qpc0 = get_chroma_qp( h, 0, qp0 );
6230 qpc1 = get_chroma_qp( h, 0, qp1 );
6231 qp0 = (qp + qp0 + 1) >> 1;
6232 qp1 = (qp + qp1 + 1) >> 1;
6233 qpc0 = (qpc + qpc0 + 1) >> 1;
6234 qpc1 = (qpc + qpc1 + 1) >> 1;
6235 qp_thresh = 15 - h->slice_alpha_c0_offset;
6236 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6237 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6240 if( IS_INTRA(mb_type) ) {
6241 int16_t bS4[4] = {4,4,4,4};
6242 int16_t bS3[4] = {3,3,3,3};
6243 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6244 if( IS_8x8DCT(mb_type) ) {
6245 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6246 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6247 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6248 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6250 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6251 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6252 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6253 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6254 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6255 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6256 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6257 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6259 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6260 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6261 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6262 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6263 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6264 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6265 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6266 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6269 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6270 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6272 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6274 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6276 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6277 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6278 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6279 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6281 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6282 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6283 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6284 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6286 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6287 bSv[0][0] = 0x0004000400040004ULL;
6288 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6289 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6291 #define FILTER(hv,dir,edge)\
6292 if(bSv[dir][edge]) {\
6293 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6295 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6296 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6302 } else if( IS_8x8DCT(mb_type) ) {
6321 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6322 MpegEncContext * const s = &h->s;
6323 const int mb_xy= mb_x + mb_y*s->mb_stride;
6324 const int mb_type = s->current_picture.mb_type[mb_xy];
6325 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6326 int first_vertical_edge_done = 0;
6329 //for sufficiently low qp, filtering wouldn't do anything
6330 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6332 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6333 int qp = s->current_picture.qscale_table[mb_xy];
6335 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6336 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6341 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6342 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6343 int top_type, left_type[2];
6344 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6345 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6346 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6348 if(IS_8x8DCT(top_type)){
6349 h->non_zero_count_cache[4+8*0]=
6350 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6351 h->non_zero_count_cache[6+8*0]=
6352 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6354 if(IS_8x8DCT(left_type[0])){
6355 h->non_zero_count_cache[3+8*1]=
6356 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6358 if(IS_8x8DCT(left_type[1])){
6359 h->non_zero_count_cache[3+8*3]=
6360 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6363 if(IS_8x8DCT(mb_type)){
6364 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6365 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6367 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6368 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6370 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6371 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6373 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6374 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6379 // left mb is in picture
6380 && h->slice_table[mb_xy-1] != 255
6381 // and current and left pair do not have the same interlaced type
6382 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6383 // and left mb is in the same slice if deblocking_filter == 2
6384 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6385 /* First vertical edge is different in MBAFF frames
6386 * There are 8 different bS to compute and 2 different Qp
6388 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6389 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6394 int mb_qp, mbn0_qp, mbn1_qp;
6396 first_vertical_edge_done = 1;
6398 if( IS_INTRA(mb_type) )
6399 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6401 for( i = 0; i < 8; i++ ) {
6402 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6404 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6406 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6407 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6408 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6415 mb_qp = s->current_picture.qscale_table[mb_xy];
6416 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6417 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6418 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6419 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6420 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6421 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6422 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6423 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6424 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6425 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6426 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6427 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6430 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6431 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6432 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6433 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6434 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6436 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6437 for( dir = 0; dir < 2; dir++ )
6440 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6441 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6442 int (*ref2frm) [48+2] = h->ref2frm[ h->slice_num &15 ];
6443 int (*ref2frmm)[48+2] = h->ref2frm[ h->slice_table[mbm_xy]&15 ];
6444 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6446 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6447 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6448 // how often to recheck mv-based bS when iterating between edges
6449 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6450 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6451 // how often to recheck mv-based bS when iterating along each edge
6452 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6454 if (first_vertical_edge_done) {
6456 first_vertical_edge_done = 0;
6459 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6462 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6463 && !IS_INTERLACED(mb_type)
6464 && IS_INTERLACED(mbm_type)
6466 // This is a special case in the norm where the filtering must
6467 // be done twice (one each of the field) even if we are in a
6468 // frame macroblock.
6470 static const int nnz_idx[4] = {4,5,6,3};
6471 unsigned int tmp_linesize = 2 * linesize;
6472 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6473 int mbn_xy = mb_xy - 2 * s->mb_stride;
6478 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6479 if( IS_INTRA(mb_type) ||
6480 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6481 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6483 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6484 for( i = 0; i < 4; i++ ) {
6485 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6486 mbn_nnz[nnz_idx[i]] != 0 )
6492 // Do not use s->qscale as luma quantizer because it has not the same
6493 // value in IPCM macroblocks.
6494 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6495 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6496 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6497 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6498 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6499 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6500 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6501 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6508 for( edge = start; edge < edges; edge++ ) {
6509 /* mbn_xy: neighbor macroblock */
6510 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6511 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6512 int (*ref2frmn)[48+2] = edge > 0 ? ref2frm : ref2frmm;
6516 if( (edge&1) && IS_8x8DCT(mb_type) )
6519 if( IS_INTRA(mb_type) ||
6520 IS_INTRA(mbn_type) ) {
6523 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6524 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6533 bS[0] = bS[1] = bS[2] = bS[3] = value;
6538 if( edge & mask_edge ) {
6539 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6542 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6543 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6546 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6547 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6548 int bn_idx= b_idx - (dir ? 8:1);
6551 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6552 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6553 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6554 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6557 if(h->slice_type_nos == FF_B_TYPE && v){
6559 for( l = 0; !v && l < 2; l++ ) {
6561 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6562 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6563 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6567 bS[0] = bS[1] = bS[2] = bS[3] = v;
6573 for( i = 0; i < 4; i++ ) {
6574 int x = dir == 0 ? edge : i;
6575 int y = dir == 0 ? i : edge;
6576 int b_idx= 8 + 4 + x + 8*y;
6577 int bn_idx= b_idx - (dir ? 8:1);
6579 if( h->non_zero_count_cache[b_idx] != 0 ||
6580 h->non_zero_count_cache[bn_idx] != 0 ) {
6586 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6587 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6588 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6589 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6595 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6597 for( l = 0; l < 2; l++ ) {
6599 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6600 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6601 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6610 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6615 // Do not use s->qscale as luma quantizer because it has not the same
6616 // value in IPCM macroblocks.
6617 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6618 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6619 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6620 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6622 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6623 if( (edge&1) == 0 ) {
6624 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6625 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6626 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6627 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6630 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6631 if( (edge&1) == 0 ) {
6632 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6633 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6634 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6635 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6642 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6643 MpegEncContext * const s = &h->s;
6644 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6648 if( h->pps.cabac ) {
6652 align_get_bits( &s->gb );
6655 ff_init_cabac_states( &h->cabac);
6656 ff_init_cabac_decoder( &h->cabac,
6657 s->gb.buffer + get_bits_count(&s->gb)/8,
6658 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6659 /* calculate pre-state */
6660 for( i= 0; i < 460; i++ ) {
6662 if( h->slice_type_nos == FF_I_TYPE )
6663 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6665 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6668 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6670 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6675 int ret = decode_mb_cabac(h);
6677 //STOP_TIMER("decode_mb_cabac")
6679 if(ret>=0) hl_decode_mb(h);
6681 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6684 if(ret>=0) ret = decode_mb_cabac(h);
6686 if(ret>=0) hl_decode_mb(h);
6689 eos = get_cabac_terminate( &h->cabac );
6691 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6692 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6693 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6697 if( ++s->mb_x >= s->mb_width ) {
6699 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6701 if(FIELD_OR_MBAFF_PICTURE) {
6706 if( eos || s->mb_y >= s->mb_height ) {
6707 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6708 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6715 int ret = decode_mb_cavlc(h);
6717 if(ret>=0) hl_decode_mb(h);
6719 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6721 ret = decode_mb_cavlc(h);
6723 if(ret>=0) hl_decode_mb(h);
6728 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6729 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6734 if(++s->mb_x >= s->mb_width){
6736 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6738 if(FIELD_OR_MBAFF_PICTURE) {
6741 if(s->mb_y >= s->mb_height){
6742 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6744 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6745 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6749 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6756 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6757 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6758 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6759 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6763 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6772 for(;s->mb_y < s->mb_height; s->mb_y++){
6773 for(;s->mb_x < s->mb_width; s->mb_x++){
6774 int ret= decode_mb(h);
6779 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6780 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6785 if(++s->mb_x >= s->mb_width){
6787 if(++s->mb_y >= s->mb_height){
6788 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6789 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6793 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6800 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6801 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6802 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6806 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6813 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6816 return -1; //not reached
6819 static int decode_unregistered_user_data(H264Context *h, int size){
6820 MpegEncContext * const s = &h->s;
6821 uint8_t user_data[16+256];
6827 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6828 user_data[i]= get_bits(&s->gb, 8);
6832 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6833 if(e==1 && build>=0)
6834 h->x264_build= build;
6836 if(s->avctx->debug & FF_DEBUG_BUGS)
6837 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6840 skip_bits(&s->gb, 8);
6845 static int decode_sei(H264Context *h){
6846 MpegEncContext * const s = &h->s;
6848 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6853 type+= show_bits(&s->gb, 8);
6854 }while(get_bits(&s->gb, 8) == 255);
6858 size+= show_bits(&s->gb, 8);
6859 }while(get_bits(&s->gb, 8) == 255);
6863 if(decode_unregistered_user_data(h, size) < 0)
6867 skip_bits(&s->gb, 8*size);
6870 //FIXME check bits here
6871 align_get_bits(&s->gb);
6877 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6878 MpegEncContext * const s = &h->s;
6880 cpb_count = get_ue_golomb(&s->gb) + 1;
6881 get_bits(&s->gb, 4); /* bit_rate_scale */
6882 get_bits(&s->gb, 4); /* cpb_size_scale */
6883 for(i=0; i<cpb_count; i++){
6884 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6885 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6886 get_bits1(&s->gb); /* cbr_flag */
6888 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6889 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6890 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6891 get_bits(&s->gb, 5); /* time_offset_length */
6894 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6895 MpegEncContext * const s = &h->s;
6896 int aspect_ratio_info_present_flag;
6897 unsigned int aspect_ratio_idc;
6898 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6900 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6902 if( aspect_ratio_info_present_flag ) {
6903 aspect_ratio_idc= get_bits(&s->gb, 8);
6904 if( aspect_ratio_idc == EXTENDED_SAR ) {
6905 sps->sar.num= get_bits(&s->gb, 16);
6906 sps->sar.den= get_bits(&s->gb, 16);
6907 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
6908 sps->sar= pixel_aspect[aspect_ratio_idc];
6910 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6917 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6919 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6920 get_bits1(&s->gb); /* overscan_appropriate_flag */
6923 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6924 get_bits(&s->gb, 3); /* video_format */
6925 get_bits1(&s->gb); /* video_full_range_flag */
6926 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6927 get_bits(&s->gb, 8); /* colour_primaries */
6928 get_bits(&s->gb, 8); /* transfer_characteristics */
6929 get_bits(&s->gb, 8); /* matrix_coefficients */
6933 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6934 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6935 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6938 sps->timing_info_present_flag = get_bits1(&s->gb);
6939 if(sps->timing_info_present_flag){
6940 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6941 sps->time_scale = get_bits_long(&s->gb, 32);
6942 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6945 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6946 if(nal_hrd_parameters_present_flag)
6947 decode_hrd_parameters(h, sps);
6948 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6949 if(vcl_hrd_parameters_present_flag)
6950 decode_hrd_parameters(h, sps);
6951 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6952 get_bits1(&s->gb); /* low_delay_hrd_flag */
6953 get_bits1(&s->gb); /* pic_struct_present_flag */
6955 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6956 if(sps->bitstream_restriction_flag){
6957 unsigned int num_reorder_frames;
6958 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6959 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6960 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6961 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6962 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6963 num_reorder_frames= get_ue_golomb(&s->gb);
6964 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6966 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6967 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6971 sps->num_reorder_frames= num_reorder_frames;
6977 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6978 const uint8_t *jvt_list, const uint8_t *fallback_list){
6979 MpegEncContext * const s = &h->s;
6980 int i, last = 8, next = 8;
6981 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6982 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6983 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6985 for(i=0;i<size;i++){
6987 next = (last + get_se_golomb(&s->gb)) & 0xff;
6988 if(!i && !next){ /* matrix not written, we use the preset one */
6989 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6992 last = factors[scan[i]] = next ? next : last;
6996 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6997 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6998 MpegEncContext * const s = &h->s;
6999 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7000 const uint8_t *fallback[4] = {
7001 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7002 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7003 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7004 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7006 if(get_bits1(&s->gb)){
7007 sps->scaling_matrix_present |= is_sps;
7008 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7009 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7010 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7011 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7012 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7013 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7014 if(is_sps || pps->transform_8x8_mode){
7015 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7016 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7018 } else if(fallback_sps) {
7019 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7020 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7025 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7028 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7029 const size_t size, const char *name)
7032 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7037 vec[id] = av_mallocz(size);
7039 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7044 static inline int decode_seq_parameter_set(H264Context *h){
7045 MpegEncContext * const s = &h->s;
7046 int profile_idc, level_idc;
7047 unsigned int sps_id, tmp, mb_width, mb_height;
7051 profile_idc= get_bits(&s->gb, 8);
7052 get_bits1(&s->gb); //constraint_set0_flag
7053 get_bits1(&s->gb); //constraint_set1_flag
7054 get_bits1(&s->gb); //constraint_set2_flag
7055 get_bits1(&s->gb); //constraint_set3_flag
7056 get_bits(&s->gb, 4); // reserved
7057 level_idc= get_bits(&s->gb, 8);
7058 sps_id= get_ue_golomb(&s->gb);
7060 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7064 sps->profile_idc= profile_idc;
7065 sps->level_idc= level_idc;
7067 if(sps->profile_idc >= 100){ //high profile
7068 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7069 if(sps->chroma_format_idc == 3)
7070 get_bits1(&s->gb); //residual_color_transform_flag
7071 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7072 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7073 sps->transform_bypass = get_bits1(&s->gb);
7074 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7076 sps->scaling_matrix_present = 0;
7077 sps->chroma_format_idc= 1;
7080 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7081 sps->poc_type= get_ue_golomb(&s->gb);
7083 if(sps->poc_type == 0){ //FIXME #define
7084 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7085 } else if(sps->poc_type == 1){//FIXME #define
7086 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7087 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7088 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7089 tmp= get_ue_golomb(&s->gb);
7091 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7092 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7095 sps->poc_cycle_length= tmp;
7097 for(i=0; i<sps->poc_cycle_length; i++)
7098 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7099 }else if(sps->poc_type != 2){
7100 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7104 tmp= get_ue_golomb(&s->gb);
7105 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7106 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7109 sps->ref_frame_count= tmp;
7110 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7111 mb_width= get_ue_golomb(&s->gb) + 1;
7112 mb_height= get_ue_golomb(&s->gb) + 1;
7113 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7114 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7115 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7118 sps->mb_width = mb_width;
7119 sps->mb_height= mb_height;
7121 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7122 if(!sps->frame_mbs_only_flag)
7123 sps->mb_aff= get_bits1(&s->gb);
7127 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7129 #ifndef ALLOW_INTERLACE
7131 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7133 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7134 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7136 sps->crop= get_bits1(&s->gb);
7138 sps->crop_left = get_ue_golomb(&s->gb);
7139 sps->crop_right = get_ue_golomb(&s->gb);
7140 sps->crop_top = get_ue_golomb(&s->gb);
7141 sps->crop_bottom= get_ue_golomb(&s->gb);
7142 if(sps->crop_left || sps->crop_top){
7143 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7145 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7146 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7152 sps->crop_bottom= 0;
7155 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7156 if( sps->vui_parameters_present_flag )
7157 decode_vui_parameters(h, sps);
7159 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7160 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7161 sps_id, sps->profile_idc, sps->level_idc,
7163 sps->ref_frame_count,
7164 sps->mb_width, sps->mb_height,
7165 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7166 sps->direct_8x8_inference_flag ? "8B8" : "",
7167 sps->crop_left, sps->crop_right,
7168 sps->crop_top, sps->crop_bottom,
7169 sps->vui_parameters_present_flag ? "VUI" : "",
7170 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7177 build_qp_table(PPS *pps, int t, int index)
7180 for(i = 0; i < 52; i++)
7181 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7184 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7185 MpegEncContext * const s = &h->s;
7186 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7189 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7193 tmp= get_ue_golomb(&s->gb);
7194 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7195 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7200 pps->cabac= get_bits1(&s->gb);
7201 pps->pic_order_present= get_bits1(&s->gb);
7202 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7203 if(pps->slice_group_count > 1 ){
7204 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7205 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7206 switch(pps->mb_slice_group_map_type){
7209 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7210 | run_length[ i ] |1 |ue(v) |
7215 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7217 | top_left_mb[ i ] |1 |ue(v) |
7218 | bottom_right_mb[ i ] |1 |ue(v) |
7226 | slice_group_change_direction_flag |1 |u(1) |
7227 | slice_group_change_rate_minus1 |1 |ue(v) |
7232 | slice_group_id_cnt_minus1 |1 |ue(v) |
7233 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7235 | slice_group_id[ i ] |1 |u(v) |
7240 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7241 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7242 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7243 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7244 pps->ref_count[0]= pps->ref_count[1]= 1;
7248 pps->weighted_pred= get_bits1(&s->gb);
7249 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7250 pps->init_qp= get_se_golomb(&s->gb) + 26;
7251 pps->init_qs= get_se_golomb(&s->gb) + 26;
7252 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7253 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7254 pps->constrained_intra_pred= get_bits1(&s->gb);
7255 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7257 pps->transform_8x8_mode= 0;
7258 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7259 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7260 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7262 if(get_bits_count(&s->gb) < bit_length){
7263 pps->transform_8x8_mode= get_bits1(&s->gb);
7264 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7265 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7267 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7270 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7271 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7272 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7273 h->pps.chroma_qp_diff= 1;
7275 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7276 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7277 pps_id, pps->sps_id,
7278 pps->cabac ? "CABAC" : "CAVLC",
7279 pps->slice_group_count,
7280 pps->ref_count[0], pps->ref_count[1],
7281 pps->weighted_pred ? "weighted" : "",
7282 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7283 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7284 pps->constrained_intra_pred ? "CONSTR" : "",
7285 pps->redundant_pic_cnt_present ? "REDU" : "",
7286 pps->transform_8x8_mode ? "8x8DCT" : ""
7294 * Call decode_slice() for each context.
7296 * @param h h264 master context
7297 * @param context_count number of contexts to execute
7299 static void execute_decode_slices(H264Context *h, int context_count){
7300 MpegEncContext * const s = &h->s;
7301 AVCodecContext * const avctx= s->avctx;
7305 if(context_count == 1) {
7306 decode_slice(avctx, h);
7308 for(i = 1; i < context_count; i++) {
7309 hx = h->thread_context[i];
7310 hx->s.error_resilience = avctx->error_resilience;
7311 hx->s.error_count = 0;
7314 avctx->execute(avctx, (void *)decode_slice,
7315 (void **)h->thread_context, NULL, context_count);
7317 /* pull back stuff from slices to master context */
7318 hx = h->thread_context[context_count - 1];
7319 s->mb_x = hx->s.mb_x;
7320 s->mb_y = hx->s.mb_y;
7321 s->dropable = hx->s.dropable;
7322 s->picture_structure = hx->s.picture_structure;
7323 for(i = 1; i < context_count; i++)
7324 h->s.error_count += h->thread_context[i]->s.error_count;
7329 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7330 MpegEncContext * const s = &h->s;
7331 AVCodecContext * const avctx= s->avctx;
7333 H264Context *hx; ///< thread context
7334 int context_count = 0;
7336 h->max_contexts = avctx->thread_count;
7339 for(i=0; i<50; i++){
7340 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7343 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7344 h->current_slice = 0;
7345 if (!s->first_field)
7346 s->current_picture_ptr= NULL;
7358 if(buf_index >= buf_size) break;
7360 for(i = 0; i < h->nal_length_size; i++)
7361 nalsize = (nalsize << 8) | buf[buf_index++];
7362 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7367 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7372 // start code prefix search
7373 for(; buf_index + 3 < buf_size; buf_index++){
7374 // This should always succeed in the first iteration.
7375 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7379 if(buf_index+3 >= buf_size) break;
7384 hx = h->thread_context[context_count];
7386 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7387 if (ptr==NULL || dst_length < 0){
7390 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7392 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7394 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7395 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7398 if (h->is_avc && (nalsize != consumed)){
7399 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7403 buf_index += consumed;
7405 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7406 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7411 switch(hx->nal_unit_type){
7413 if (h->nal_unit_type != NAL_IDR_SLICE) {
7414 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7417 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7419 init_get_bits(&hx->s.gb, ptr, bit_length);
7421 hx->inter_gb_ptr= &hx->s.gb;
7422 hx->s.data_partitioning = 0;
7424 if((err = decode_slice_header(hx, h)))
7427 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7428 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7429 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7430 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7431 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7432 && avctx->skip_frame < AVDISCARD_ALL)
7436 init_get_bits(&hx->s.gb, ptr, bit_length);
7438 hx->inter_gb_ptr= NULL;
7439 hx->s.data_partitioning = 1;
7441 err = decode_slice_header(hx, h);
7444 init_get_bits(&hx->intra_gb, ptr, bit_length);
7445 hx->intra_gb_ptr= &hx->intra_gb;
7448 init_get_bits(&hx->inter_gb, ptr, bit_length);
7449 hx->inter_gb_ptr= &hx->inter_gb;
7451 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7452 && s->context_initialized
7454 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7455 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7456 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7457 && avctx->skip_frame < AVDISCARD_ALL)
7461 init_get_bits(&s->gb, ptr, bit_length);
7465 init_get_bits(&s->gb, ptr, bit_length);
7466 decode_seq_parameter_set(h);
7468 if(s->flags& CODEC_FLAG_LOW_DELAY)
7471 if(avctx->has_b_frames < 2)
7472 avctx->has_b_frames= !s->low_delay;
7475 init_get_bits(&s->gb, ptr, bit_length);
7477 decode_picture_parameter_set(h, bit_length);
7481 case NAL_END_SEQUENCE:
7482 case NAL_END_STREAM:
7483 case NAL_FILLER_DATA:
7485 case NAL_AUXILIARY_SLICE:
7488 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7491 if(context_count == h->max_contexts) {
7492 execute_decode_slices(h, context_count);
7497 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7499 /* Slice could not be decoded in parallel mode, copy down
7500 * NAL unit stuff to context 0 and restart. Note that
7501 * rbsp_buffer is not transferred, but since we no longer
7502 * run in parallel mode this should not be an issue. */
7503 h->nal_unit_type = hx->nal_unit_type;
7504 h->nal_ref_idc = hx->nal_ref_idc;
7510 execute_decode_slices(h, context_count);
7515 * returns the number of bytes consumed for building the current frame
7517 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7518 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7519 if(pos+10>buf_size) pos=buf_size; // oops ;)
7524 static int decode_frame(AVCodecContext *avctx,
7525 void *data, int *data_size,
7526 const uint8_t *buf, int buf_size)
7528 H264Context *h = avctx->priv_data;
7529 MpegEncContext *s = &h->s;
7530 AVFrame *pict = data;
7533 s->flags= avctx->flags;
7534 s->flags2= avctx->flags2;
7536 /* end of stream, output what is still in the buffers */
7537 if (buf_size == 0) {
7541 //FIXME factorize this with the output code below
7542 out = h->delayed_pic[0];
7544 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7545 if(h->delayed_pic[i]->poc < out->poc){
7546 out = h->delayed_pic[i];
7550 for(i=out_idx; h->delayed_pic[i]; i++)
7551 h->delayed_pic[i] = h->delayed_pic[i+1];
7554 *data_size = sizeof(AVFrame);
7555 *pict= *(AVFrame*)out;
7561 if(h->is_avc && !h->got_avcC) {
7562 int i, cnt, nalsize;
7563 unsigned char *p = avctx->extradata;
7564 if(avctx->extradata_size < 7) {
7565 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7569 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7572 /* sps and pps in the avcC always have length coded with 2 bytes,
7573 so put a fake nal_length_size = 2 while parsing them */
7574 h->nal_length_size = 2;
7575 // Decode sps from avcC
7576 cnt = *(p+5) & 0x1f; // Number of sps
7578 for (i = 0; i < cnt; i++) {
7579 nalsize = AV_RB16(p) + 2;
7580 if(decode_nal_units(h, p, nalsize) < 0) {
7581 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7586 // Decode pps from avcC
7587 cnt = *(p++); // Number of pps
7588 for (i = 0; i < cnt; i++) {
7589 nalsize = AV_RB16(p) + 2;
7590 if(decode_nal_units(h, p, nalsize) != nalsize) {
7591 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7596 // Now store right nal length size, that will be use to parse all other nals
7597 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7598 // Do not reparse avcC
7602 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7603 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7607 buf_index=decode_nal_units(h, buf, buf_size);
7611 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7612 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7613 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7617 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7618 Picture *out = s->current_picture_ptr;
7619 Picture *cur = s->current_picture_ptr;
7620 int i, pics, cross_idr, out_of_order, out_idx;
7624 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7625 s->current_picture_ptr->pict_type= s->pict_type;
7628 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7629 h->prev_poc_msb= h->poc_msb;
7630 h->prev_poc_lsb= h->poc_lsb;
7632 h->prev_frame_num_offset= h->frame_num_offset;
7633 h->prev_frame_num= h->frame_num;
7636 * FIXME: Error handling code does not seem to support interlaced
7637 * when slices span multiple rows
7638 * The ff_er_add_slice calls don't work right for bottom
7639 * fields; they cause massive erroneous error concealing
7640 * Error marking covers both fields (top and bottom).
7641 * This causes a mismatched s->error_count
7642 * and a bad error table. Further, the error count goes to
7643 * INT_MAX when called for bottom field, because mb_y is
7644 * past end by one (callers fault) and resync_mb_y != 0
7645 * causes problems for the first MB line, too.
7652 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7653 /* Wait for second field. */
7657 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7658 /* Derive top_field_first from field pocs. */
7659 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7661 //FIXME do something with unavailable reference frames
7663 /* Sort B-frames into display order */
7665 if(h->sps.bitstream_restriction_flag
7666 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7667 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7671 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7672 && !h->sps.bitstream_restriction_flag){
7673 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7678 while(h->delayed_pic[pics]) pics++;
7680 assert(pics <= MAX_DELAYED_PIC_COUNT);
7682 h->delayed_pic[pics++] = cur;
7683 if(cur->reference == 0)
7684 cur->reference = DELAYED_PIC_REF;
7686 out = h->delayed_pic[0];
7688 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7689 if(h->delayed_pic[i]->poc < out->poc){
7690 out = h->delayed_pic[i];
7693 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
7695 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7697 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7699 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7701 ((!cross_idr && out->poc > h->outputed_poc + 2)
7702 || cur->pict_type == FF_B_TYPE)))
7705 s->avctx->has_b_frames++;
7708 if(out_of_order || pics > s->avctx->has_b_frames){
7709 out->reference &= ~DELAYED_PIC_REF;
7710 for(i=out_idx; h->delayed_pic[i]; i++)
7711 h->delayed_pic[i] = h->delayed_pic[i+1];
7713 if(!out_of_order && pics > s->avctx->has_b_frames){
7714 *data_size = sizeof(AVFrame);
7716 h->outputed_poc = out->poc;
7717 *pict= *(AVFrame*)out;
7719 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7724 assert(pict->data[0] || !*data_size);
7725 ff_print_debug_info(s, pict);
7726 //printf("out %d\n", (int)pict->data[0]);
7729 /* Return the Picture timestamp as the frame number */
7730 /* we subtract 1 because it is added on utils.c */
7731 avctx->frame_number = s->picture_number - 1;
7733 return get_consumed_bytes(s, buf_index, buf_size);
7736 static inline void fill_mb_avail(H264Context *h){
7737 MpegEncContext * const s = &h->s;
7738 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7741 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7742 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7743 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7749 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7750 h->mb_avail[4]= 1; //FIXME move out
7751 h->mb_avail[5]= 0; //FIXME move out
7759 #define SIZE (COUNT*40)
7765 // int int_temp[10000];
7767 AVCodecContext avctx;
7769 dsputil_init(&dsp, &avctx);
7771 init_put_bits(&pb, temp, SIZE);
7772 printf("testing unsigned exp golomb\n");
7773 for(i=0; i<COUNT; i++){
7775 set_ue_golomb(&pb, i);
7776 STOP_TIMER("set_ue_golomb");
7778 flush_put_bits(&pb);
7780 init_get_bits(&gb, temp, 8*SIZE);
7781 for(i=0; i<COUNT; i++){
7784 s= show_bits(&gb, 24);
7787 j= get_ue_golomb(&gb);
7789 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7792 STOP_TIMER("get_ue_golomb");
7796 init_put_bits(&pb, temp, SIZE);
7797 printf("testing signed exp golomb\n");
7798 for(i=0; i<COUNT; i++){
7800 set_se_golomb(&pb, i - COUNT/2);
7801 STOP_TIMER("set_se_golomb");
7803 flush_put_bits(&pb);
7805 init_get_bits(&gb, temp, 8*SIZE);
7806 for(i=0; i<COUNT; i++){
7809 s= show_bits(&gb, 24);
7812 j= get_se_golomb(&gb);
7813 if(j != i - COUNT/2){
7814 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7817 STOP_TIMER("get_se_golomb");
7821 printf("testing 4x4 (I)DCT\n");
7824 uint8_t src[16], ref[16];
7825 uint64_t error= 0, max_error=0;
7827 for(i=0; i<COUNT; i++){
7829 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7830 for(j=0; j<16; j++){
7831 ref[j]= random()%255;
7832 src[j]= random()%255;
7835 h264_diff_dct_c(block, src, ref, 4);
7838 for(j=0; j<16; j++){
7839 // printf("%d ", block[j]);
7840 block[j]= block[j]*4;
7841 if(j&1) block[j]= (block[j]*4 + 2)/5;
7842 if(j&4) block[j]= (block[j]*4 + 2)/5;
7846 s->dsp.h264_idct_add(ref, block, 4);
7847 /* for(j=0; j<16; j++){
7848 printf("%d ", ref[j]);
7852 for(j=0; j<16; j++){
7853 int diff= FFABS(src[j] - ref[j]);
7856 max_error= FFMAX(max_error, diff);
7859 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7860 printf("testing quantizer\n");
7861 for(qp=0; qp<52; qp++){
7863 src1_block[i]= src2_block[i]= random()%255;
7866 printf("Testing NAL layer\n");
7868 uint8_t bitstream[COUNT];
7869 uint8_t nal[COUNT*2];
7871 memset(&h, 0, sizeof(H264Context));
7873 for(i=0; i<COUNT; i++){
7881 for(j=0; j<COUNT; j++){
7882 bitstream[j]= (random() % 255) + 1;
7885 for(j=0; j<zeros; j++){
7886 int pos= random() % COUNT;
7887 while(bitstream[pos] == 0){
7896 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7898 printf("encoding failed\n");
7902 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7906 if(out_length != COUNT){
7907 printf("incorrect length %d %d\n", out_length, COUNT);
7911 if(consumed != nal_length){
7912 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7916 if(memcmp(bitstream, out, COUNT)){
7917 printf("mismatch\n");
7923 printf("Testing RBSP\n");
7931 static av_cold int decode_end(AVCodecContext *avctx)
7933 H264Context *h = avctx->priv_data;
7934 MpegEncContext *s = &h->s;
7936 av_freep(&h->rbsp_buffer[0]);
7937 av_freep(&h->rbsp_buffer[1]);
7938 free_tables(h); //FIXME cleanup init stuff perhaps
7941 // memset(h, 0, sizeof(H264Context));
7947 AVCodec h264_decoder = {
7951 sizeof(H264Context),
7956 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7958 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),