2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
33 #include "h264_parser.h"
35 #include "rectangle.h"
39 #include "i386/h264_i386.h"
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
64 static Picture * remove_long(H264Context *h, int i, int ref_mask);
66 static av_always_inline uint32_t pack16to32(int a, int b){
67 #ifdef WORDS_BIGENDIAN
68 return (b&0xFFFF) + (a<<16);
70 return (a&0xFFFF) + (b<<16);
74 const uint8_t ff_rem6[52]={
75 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
78 const uint8_t ff_div6[52]={
79 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static const int left_block_options[4][8]={
89 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
90 MpegEncContext * const s = &h->s;
91 const int mb_xy= h->mb_xy;
92 int topleft_xy, top_xy, topright_xy, left_xy[2];
93 int topleft_type, top_type, topright_type, left_type[2];
95 int topleft_partition= -1;
98 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
100 //FIXME deblocking could skip the intra and nnz parts.
101 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
104 /* Wow, what a mess, why didn't they simplify the interlacing & intra
105 * stuff, I can't imagine that these complex rules are worth it. */
107 topleft_xy = top_xy - 1;
108 topright_xy= top_xy + 1;
109 left_xy[1] = left_xy[0] = mb_xy-1;
110 left_block = left_block_options[0];
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
137 topleft_partition = 0;
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
149 left_block = left_block_options[1];
151 left_block= left_block_options[2];
154 left_xy[1] += s->mb_stride;
155 left_block = left_block_options[3];
160 h->top_mb_xy = top_xy;
161 h->left_mb_xy[0] = left_xy[0];
162 h->left_mb_xy[1] = left_xy[1];
166 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
167 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
168 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
170 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
172 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
174 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
175 for(list=0; list<h->list_count; list++){
176 if(USES_LIST(mb_type,list)){
177 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
178 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
179 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
180 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
186 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
187 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
189 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
190 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
192 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
193 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
198 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
199 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
200 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
201 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
202 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
205 if(IS_INTRA(mb_type)){
206 h->topleft_samples_available=
207 h->top_samples_available=
208 h->left_samples_available= 0xFFFF;
209 h->topright_samples_available= 0xEEEA;
211 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
212 h->topleft_samples_available= 0xB3FF;
213 h->top_samples_available= 0x33FF;
214 h->topright_samples_available= 0x26EA;
217 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
218 h->topleft_samples_available&= 0xDF5F;
219 h->left_samples_available&= 0x5F5F;
223 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
224 h->topleft_samples_available&= 0x7FFF;
226 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
227 h->topright_samples_available&= 0xFBFF;
229 if(IS_INTRA4x4(mb_type)){
230 if(IS_INTRA4x4(top_type)){
231 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
232 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
233 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
234 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
237 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
242 h->intra4x4_pred_mode_cache[4+8*0]=
243 h->intra4x4_pred_mode_cache[5+8*0]=
244 h->intra4x4_pred_mode_cache[6+8*0]=
245 h->intra4x4_pred_mode_cache[7+8*0]= pred;
248 if(IS_INTRA4x4(left_type[i])){
249 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
250 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
253 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
258 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
259 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
274 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
276 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
277 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
278 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
279 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
281 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
282 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
284 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
285 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
288 h->non_zero_count_cache[4+8*0]=
289 h->non_zero_count_cache[5+8*0]=
290 h->non_zero_count_cache[6+8*0]=
291 h->non_zero_count_cache[7+8*0]=
293 h->non_zero_count_cache[1+8*0]=
294 h->non_zero_count_cache[2+8*0]=
296 h->non_zero_count_cache[1+8*3]=
297 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
301 for (i=0; i<2; i++) {
303 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
304 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
305 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
306 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
308 h->non_zero_count_cache[3+8*1 + 2*8*i]=
309 h->non_zero_count_cache[3+8*2 + 2*8*i]=
310 h->non_zero_count_cache[0+8*1 + 8*i]=
311 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
318 h->top_cbp = h->cbp_table[top_xy];
319 } else if(IS_INTRA(mb_type)) {
326 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
327 } else if(IS_INTRA(mb_type)) {
333 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
336 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
341 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
343 for(list=0; list<h->list_count; list++){
344 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
345 /*if(!h->mv_cache_clean[list]){
346 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
347 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
348 h->mv_cache_clean[list]= 1;
352 h->mv_cache_clean[list]= 0;
354 if(USES_LIST(top_type, list)){
355 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
356 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
357 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
358 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
359 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
360 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
361 h->ref_cache[list][scan8[0] + 0 - 1*8]=
362 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
363 h->ref_cache[list][scan8[0] + 2 - 1*8]=
364 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
366 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
367 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
368 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
369 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
370 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
374 int cache_idx = scan8[0] - 1 + i*2*8;
375 if(USES_LIST(left_type[i], list)){
376 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
377 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
378 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
379 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
380 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
381 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
383 *(uint32_t*)h->mv_cache [list][cache_idx ]=
384 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
385 h->ref_cache[list][cache_idx ]=
386 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
390 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
393 if(USES_LIST(topleft_type, list)){
394 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
395 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
396 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
397 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
399 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
400 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
403 if(USES_LIST(topright_type, list)){
404 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
405 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
406 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
407 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
409 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
410 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
413 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
416 h->ref_cache[list][scan8[5 ]+1] =
417 h->ref_cache[list][scan8[7 ]+1] =
418 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
419 h->ref_cache[list][scan8[4 ]] =
420 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
421 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
422 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
423 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
424 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
425 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
428 /* XXX beurk, Load mvd */
429 if(USES_LIST(top_type, list)){
430 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
431 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
432 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
433 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
434 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
436 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
437 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
438 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
439 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
441 if(USES_LIST(left_type[0], list)){
442 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
443 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
444 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
446 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
447 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
449 if(USES_LIST(left_type[1], list)){
450 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
451 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
452 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
454 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
455 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
457 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
458 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
459 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
460 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
461 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
463 if(h->slice_type_nos == FF_B_TYPE){
464 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
466 if(IS_DIRECT(top_type)){
467 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
468 }else if(IS_8X8(top_type)){
469 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
470 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
471 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
473 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
476 if(IS_DIRECT(left_type[0]))
477 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
478 else if(IS_8X8(left_type[0]))
479 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
481 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
483 if(IS_DIRECT(left_type[1]))
484 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
485 else if(IS_8X8(left_type[1]))
486 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
488 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
494 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
495 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
496 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
497 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
498 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
499 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
500 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
501 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
502 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
503 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
505 #define MAP_F2F(idx, mb_type)\
506 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
507 h->ref_cache[list][idx] <<= 1;\
508 h->mv_cache[list][idx][1] /= 2;\
509 h->mvd_cache[list][idx][1] /= 2;\
514 #define MAP_F2F(idx, mb_type)\
515 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
516 h->ref_cache[list][idx] >>= 1;\
517 h->mv_cache[list][idx][1] <<= 1;\
518 h->mvd_cache[list][idx][1] <<= 1;\
528 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
531 static inline void write_back_intra_pred_mode(H264Context *h){
532 const int mb_xy= h->mb_xy;
534 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
535 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
536 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
537 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
538 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
539 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
540 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
544 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
546 static inline int check_intra4x4_pred_mode(H264Context *h){
547 MpegEncContext * const s = &h->s;
548 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
549 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
552 if(!(h->top_samples_available&0x8000)){
554 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
556 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
559 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
564 if(!(h->left_samples_available&0x8000)){
566 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
568 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
571 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
577 } //FIXME cleanup like next
580 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
582 static inline int check_intra_pred_mode(H264Context *h, int mode){
583 MpegEncContext * const s = &h->s;
584 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
585 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
588 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
592 if(!(h->top_samples_available&0x8000)){
595 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
600 if(!(h->left_samples_available&0x8000)){
603 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
612 * gets the predicted intra4x4 prediction mode.
614 static inline int pred_intra_mode(H264Context *h, int n){
615 const int index8= scan8[n];
616 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
617 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
618 const int min= FFMIN(left, top);
620 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
622 if(min<0) return DC_PRED;
626 static inline void write_back_non_zero_count(H264Context *h){
627 const int mb_xy= h->mb_xy;
629 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
630 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
631 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
632 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
633 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
634 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
635 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
637 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
638 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
639 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
641 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
642 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
643 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
646 // store all luma nnzs, for deblocking
649 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
650 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
655 * gets the predicted number of non-zero coefficients.
656 * @param n block index
658 static inline int pred_non_zero_count(H264Context *h, int n){
659 const int index8= scan8[n];
660 const int left= h->non_zero_count_cache[index8 - 1];
661 const int top = h->non_zero_count_cache[index8 - 8];
664 if(i<64) i= (i+1)>>1;
666 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
671 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
672 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
673 MpegEncContext *s = &h->s;
675 /* there is no consistent mapping of mvs to neighboring locations that will
676 * make mbaff happy, so we can't move all this logic to fill_caches */
678 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
680 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
681 *C = h->mv_cache[list][scan8[0]-2];
684 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
685 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
686 if(IS_INTERLACED(mb_types[topright_xy])){
687 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
688 const int x4 = X4, y4 = Y4;\
689 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
690 if(!USES_LIST(mb_type,list))\
691 return LIST_NOT_USED;\
692 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
693 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
694 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
695 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
697 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
700 if(topright_ref == PART_NOT_AVAILABLE
701 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
702 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
704 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
705 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
708 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
710 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
711 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
717 if(topright_ref != PART_NOT_AVAILABLE){
718 *C= h->mv_cache[list][ i - 8 + part_width ];
721 tprintf(s->avctx, "topright MV not available\n");
723 *C= h->mv_cache[list][ i - 8 - 1 ];
724 return h->ref_cache[list][ i - 8 - 1 ];
729 * gets the predicted MV.
730 * @param n the block index
731 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
732 * @param mx the x component of the predicted motion vector
733 * @param my the y component of the predicted motion vector
735 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
736 const int index8= scan8[n];
737 const int top_ref= h->ref_cache[list][ index8 - 8 ];
738 const int left_ref= h->ref_cache[list][ index8 - 1 ];
739 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
740 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
742 int diagonal_ref, match_count;
744 assert(part_width==1 || part_width==2 || part_width==4);
754 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
755 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
756 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
757 if(match_count > 1){ //most common
758 *mx= mid_pred(A[0], B[0], C[0]);
759 *my= mid_pred(A[1], B[1], C[1]);
760 }else if(match_count==1){
764 }else if(top_ref==ref){
772 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
776 *mx= mid_pred(A[0], B[0], C[0]);
777 *my= mid_pred(A[1], B[1], C[1]);
781 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
785 * gets the directionally predicted 16x8 MV.
786 * @param n the block index
787 * @param mx the x component of the predicted motion vector
788 * @param my the y component of the predicted motion vector
790 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
792 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
793 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
795 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
803 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
804 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
806 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
816 pred_motion(h, n, 4, list, ref, mx, my);
820 * gets the directionally predicted 8x16 MV.
821 * @param n the block index
822 * @param mx the x component of the predicted motion vector
823 * @param my the y component of the predicted motion vector
825 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
827 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
828 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
830 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
841 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
843 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
845 if(diagonal_ref == ref){
853 pred_motion(h, n, 2, list, ref, mx, my);
856 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
857 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
858 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
860 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
862 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
863 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
864 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
870 pred_motion(h, 0, 4, 0, 0, mx, my);
875 static inline void direct_dist_scale_factor(H264Context * const h){
876 MpegEncContext * const s = &h->s;
877 const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
878 const int poc1 = h->ref_list[1][0].poc;
880 for(i=0; i<h->ref_count[0]; i++){
881 int poc0 = h->ref_list[0][i].poc;
882 int td = av_clip(poc1 - poc0, -128, 127);
883 if(td == 0 /* FIXME || pic0 is a long-term ref */){
884 h->dist_scale_factor[i] = 256;
886 int tb = av_clip(poc - poc0, -128, 127);
887 int tx = (16384 + (FFABS(td) >> 1)) / td;
888 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
892 for(i=0; i<h->ref_count[0]; i++){
893 h->dist_scale_factor_field[2*i] =
894 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
898 static inline void direct_ref_list_init(H264Context * const h){
899 MpegEncContext * const s = &h->s;
900 Picture * const ref1 = &h->ref_list[1][0];
901 Picture * const cur = s->current_picture_ptr;
903 int sidx= s->picture_structure&1;
904 if(cur->pict_type == FF_I_TYPE)
905 cur->ref_count[sidx][0] = 0;
906 if(cur->pict_type != FF_B_TYPE)
907 cur->ref_count[sidx][1] = 0;
908 for(list=0; list<2; list++){
909 cur->ref_count[sidx][list] = h->ref_count[list];
910 for(j=0; j<h->ref_count[list]; j++)
911 cur->ref_poc[sidx][list][j] = h->ref_list[list][j].poc;
913 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
915 for(list=0; list<2; list++){
916 for(i=0; i<ref1->ref_count[sidx][list]; i++){
917 const int poc = ref1->ref_poc[sidx][list][i];
918 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
919 for(j=0; j<h->ref_count[list]; j++)
920 if(h->ref_list[list][j].poc == poc){
921 h->map_col_to_list0[list][i] = j;
927 for(list=0; list<2; list++){
928 for(i=0; i<ref1->ref_count[sidx][list]; i++){
929 j = h->map_col_to_list0[list][i];
930 h->map_col_to_list0_field[list][2*i] = 2*j;
931 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
937 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
938 MpegEncContext * const s = &h->s;
939 const int fieldoff= (s->picture_structure & h->ref_list[1][0].reference) ? 0 : (3-2*s->picture_structure);
940 const int mb_xy = h->mb_xy + s->mb_stride*fieldoff;
941 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride + 2*h->b8_stride*fieldoff;
942 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h-> b_stride + 4*h-> b_stride*fieldoff;
943 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
944 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
945 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
946 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
947 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
948 const int is_b8x8 = IS_8X8(*mb_type);
949 unsigned int sub_mb_type;
952 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
953 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
954 /* FIXME save sub mb types from previous frames (or derive from MVs)
955 * so we know exactly what block size to use */
956 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
957 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
958 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
959 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
960 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
962 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
963 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
966 *mb_type |= MB_TYPE_DIRECT2;
968 *mb_type |= MB_TYPE_INTERLACED;
970 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
972 if(h->direct_spatial_mv_pred){
977 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
979 /* ref = min(neighbors) */
980 for(list=0; list<2; list++){
981 int refa = h->ref_cache[list][scan8[0] - 1];
982 int refb = h->ref_cache[list][scan8[0] - 8];
983 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
984 if(refc == PART_NOT_AVAILABLE)
985 refc = h->ref_cache[list][scan8[0] - 8 - 1];
986 ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
991 if(ref[0] < 0 && ref[1] < 0){
993 mv[0][0] = mv[0][1] =
994 mv[1][0] = mv[1][1] = 0;
996 for(list=0; list<2; list++){
998 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1000 mv[list][0] = mv[list][1] = 0;
1006 *mb_type &= ~MB_TYPE_L1;
1007 sub_mb_type &= ~MB_TYPE_L1;
1008 }else if(ref[0] < 0){
1010 *mb_type &= ~MB_TYPE_L0;
1011 sub_mb_type &= ~MB_TYPE_L0;
1014 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1015 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1016 int mb_types_col[2];
1017 int b8_stride = h->b8_stride;
1018 int b4_stride = h->b_stride;
1020 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1022 if(IS_INTERLACED(*mb_type)){
1023 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1024 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1026 l1ref0 -= 2*b8_stride;
1027 l1ref1 -= 2*b8_stride;
1028 l1mv0 -= 4*b4_stride;
1029 l1mv1 -= 4*b4_stride;
1034 int cur_poc = s->current_picture_ptr->poc;
1035 int *col_poc = h->ref_list[1]->field_poc;
1036 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1037 int dy = 2*col_parity - (s->mb_y&1);
1039 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1040 l1ref0 += dy*b8_stride;
1041 l1ref1 += dy*b8_stride;
1042 l1mv0 += 2*dy*b4_stride;
1043 l1mv1 += 2*dy*b4_stride;
1047 for(i8=0; i8<4; i8++){
1050 int xy8 = x8+y8*b8_stride;
1051 int xy4 = 3*x8+y8*b4_stride;
1054 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1056 h->sub_mb_type[i8] = sub_mb_type;
1058 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1059 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1060 if(!IS_INTRA(mb_types_col[y8])
1061 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1062 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1064 a= pack16to32(mv[0][0],mv[0][1]);
1066 b= pack16to32(mv[1][0],mv[1][1]);
1068 a= pack16to32(mv[0][0],mv[0][1]);
1069 b= pack16to32(mv[1][0],mv[1][1]);
1071 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1072 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1074 }else if(IS_16X16(*mb_type)){
1077 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1078 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1079 if(!IS_INTRA(mb_type_col)
1080 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1081 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1082 && (h->x264_build>33 || !h->x264_build)))){
1084 a= pack16to32(mv[0][0],mv[0][1]);
1086 b= pack16to32(mv[1][0],mv[1][1]);
1088 a= pack16to32(mv[0][0],mv[0][1]);
1089 b= pack16to32(mv[1][0],mv[1][1]);
1091 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1092 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1094 for(i8=0; i8<4; i8++){
1095 const int x8 = i8&1;
1096 const int y8 = i8>>1;
1098 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1100 h->sub_mb_type[i8] = sub_mb_type;
1102 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1103 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1104 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1105 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1108 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1109 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1110 && (h->x264_build>33 || !h->x264_build)))){
1111 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1112 if(IS_SUB_8X8(sub_mb_type)){
1113 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1114 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1116 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1118 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1121 for(i4=0; i4<4; i4++){
1122 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1123 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1125 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1127 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1133 }else{ /* direct temporal mv pred */
1134 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1135 const int *dist_scale_factor = h->dist_scale_factor;
1138 if(IS_INTERLACED(*mb_type)){
1139 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1140 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1141 dist_scale_factor = h->dist_scale_factor_field;
1143 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1144 /* FIXME assumes direct_8x8_inference == 1 */
1145 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1146 int mb_types_col[2];
1149 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1150 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1151 | (*mb_type & MB_TYPE_INTERLACED);
1152 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1154 if(IS_INTERLACED(*mb_type)){
1155 /* frame to field scaling */
1156 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1157 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1159 l1ref0 -= 2*h->b8_stride;
1160 l1ref1 -= 2*h->b8_stride;
1161 l1mv0 -= 4*h->b_stride;
1162 l1mv1 -= 4*h->b_stride;
1166 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1167 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1169 *mb_type |= MB_TYPE_16x8;
1171 *mb_type |= MB_TYPE_8x8;
1173 /* field to frame scaling */
1174 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1175 * but in MBAFF, top and bottom POC are equal */
1176 int dy = (s->mb_y&1) ? 1 : 2;
1178 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1179 l1ref0 += dy*h->b8_stride;
1180 l1ref1 += dy*h->b8_stride;
1181 l1mv0 += 2*dy*h->b_stride;
1182 l1mv1 += 2*dy*h->b_stride;
1185 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1187 *mb_type |= MB_TYPE_16x16;
1189 *mb_type |= MB_TYPE_8x8;
1192 for(i8=0; i8<4; i8++){
1193 const int x8 = i8&1;
1194 const int y8 = i8>>1;
1196 const int16_t (*l1mv)[2]= l1mv0;
1198 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1200 h->sub_mb_type[i8] = sub_mb_type;
1202 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1203 if(IS_INTRA(mb_types_col[y8])){
1204 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1205 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1206 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1210 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1212 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1214 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1217 scale = dist_scale_factor[ref0];
1218 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1221 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1222 int my_col = (mv_col[1]<<y_shift)/2;
1223 int mx = (scale * mv_col[0] + 128) >> 8;
1224 int my = (scale * my_col + 128) >> 8;
1225 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1226 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1233 /* one-to-one mv scaling */
1235 if(IS_16X16(*mb_type)){
1238 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1239 if(IS_INTRA(mb_type_col)){
1242 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1243 : map_col_to_list0[1][l1ref1[0]];
1244 const int scale = dist_scale_factor[ref0];
1245 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1247 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1248 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1250 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1251 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1253 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1254 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1255 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1257 for(i8=0; i8<4; i8++){
1258 const int x8 = i8&1;
1259 const int y8 = i8>>1;
1261 const int16_t (*l1mv)[2]= l1mv0;
1263 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1265 h->sub_mb_type[i8] = sub_mb_type;
1266 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1267 if(IS_INTRA(mb_type_col)){
1268 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1269 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1270 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1274 ref0 = l1ref0[x8 + y8*h->b8_stride];
1276 ref0 = map_col_to_list0[0][ref0];
1278 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1281 scale = dist_scale_factor[ref0];
1283 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1284 if(IS_SUB_8X8(sub_mb_type)){
1285 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1286 int mx = (scale * mv_col[0] + 128) >> 8;
1287 int my = (scale * mv_col[1] + 128) >> 8;
1288 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1289 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1291 for(i4=0; i4<4; i4++){
1292 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1293 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1294 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1295 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1296 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1297 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1304 static inline void write_back_motion(H264Context *h, int mb_type){
1305 MpegEncContext * const s = &h->s;
1306 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1307 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1310 if(!USES_LIST(mb_type, 0))
1311 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1313 for(list=0; list<h->list_count; list++){
1315 if(!USES_LIST(mb_type, list))
1319 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1320 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1322 if( h->pps.cabac ) {
1323 if(IS_SKIP(mb_type))
1324 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1327 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1328 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1333 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1334 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1335 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1336 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1337 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1341 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1342 if(IS_8X8(mb_type)){
1343 uint8_t *direct_table = &h->direct_table[b8_xy];
1344 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1345 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1346 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1352 * Decodes a network abstraction layer unit.
1353 * @param consumed is the number of bytes used as input
1354 * @param length is the length of the array
1355 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1356 * @returns decoded bytes, might be src+1 if no escapes
1358 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1363 // src[0]&0x80; //forbidden bit
1364 h->nal_ref_idc= src[0]>>5;
1365 h->nal_unit_type= src[0]&0x1F;
1369 for(i=0; i<length; i++)
1370 printf("%2X ", src[i]);
1372 for(i=0; i+1<length; i+=2){
1373 if(src[i]) continue;
1374 if(i>0 && src[i-1]==0) i--;
1375 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1377 /* startcode, so we must be past the end */
1384 if(i>=length-1){ //no escaped 0
1385 *dst_length= length;
1386 *consumed= length+1; //+1 for the header
1390 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1391 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1392 dst= h->rbsp_buffer[bufidx];
1398 //printf("decoding esc\n");
1401 //remove escapes (very rare 1:2^22)
1402 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1403 if(src[si+2]==3){ //escape
1408 }else //next start code
1412 dst[di++]= src[si++];
1416 *consumed= si + 1;//+1 for the header
1417 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1422 * identifies the exact end of the bitstream
1423 * @return the length of the trailing, or 0 if damaged
1425 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1429 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1439 * IDCT transforms the 16 dc values and dequantizes them.
1440 * @param qp quantization parameter
1442 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1445 int temp[16]; //FIXME check if this is a good idea
1446 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1447 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1449 //memset(block, 64, 2*256);
1452 const int offset= y_offset[i];
1453 const int z0= block[offset+stride*0] + block[offset+stride*4];
1454 const int z1= block[offset+stride*0] - block[offset+stride*4];
1455 const int z2= block[offset+stride*1] - block[offset+stride*5];
1456 const int z3= block[offset+stride*1] + block[offset+stride*5];
1465 const int offset= x_offset[i];
1466 const int z0= temp[4*0+i] + temp[4*2+i];
1467 const int z1= temp[4*0+i] - temp[4*2+i];
1468 const int z2= temp[4*1+i] - temp[4*3+i];
1469 const int z3= temp[4*1+i] + temp[4*3+i];
1471 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1472 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1473 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1474 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1480 * DCT transforms the 16 dc values.
1481 * @param qp quantization parameter ??? FIXME
1483 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1484 // const int qmul= dequant_coeff[qp][0];
1486 int temp[16]; //FIXME check if this is a good idea
1487 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1488 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1491 const int offset= y_offset[i];
1492 const int z0= block[offset+stride*0] + block[offset+stride*4];
1493 const int z1= block[offset+stride*0] - block[offset+stride*4];
1494 const int z2= block[offset+stride*1] - block[offset+stride*5];
1495 const int z3= block[offset+stride*1] + block[offset+stride*5];
1504 const int offset= x_offset[i];
1505 const int z0= temp[4*0+i] + temp[4*2+i];
1506 const int z1= temp[4*0+i] - temp[4*2+i];
1507 const int z2= temp[4*1+i] - temp[4*3+i];
1508 const int z3= temp[4*1+i] + temp[4*3+i];
1510 block[stride*0 +offset]= (z0 + z3)>>1;
1511 block[stride*2 +offset]= (z1 + z2)>>1;
1512 block[stride*8 +offset]= (z1 - z2)>>1;
1513 block[stride*10+offset]= (z0 - z3)>>1;
1521 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1522 const int stride= 16*2;
1523 const int xStride= 16;
1526 a= block[stride*0 + xStride*0];
1527 b= block[stride*0 + xStride*1];
1528 c= block[stride*1 + xStride*0];
1529 d= block[stride*1 + xStride*1];
1536 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1537 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1538 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1539 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1543 static void chroma_dc_dct_c(DCTELEM *block){
1544 const int stride= 16*2;
1545 const int xStride= 16;
1548 a= block[stride*0 + xStride*0];
1549 b= block[stride*0 + xStride*1];
1550 c= block[stride*1 + xStride*0];
1551 d= block[stride*1 + xStride*1];
1558 block[stride*0 + xStride*0]= (a+c);
1559 block[stride*0 + xStride*1]= (e+b);
1560 block[stride*1 + xStride*0]= (a-c);
1561 block[stride*1 + xStride*1]= (e-b);
1566 * gets the chroma qp.
1568 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1569 return h->pps.chroma_qp_table[t][qscale];
1572 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1573 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1574 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1576 const int * const quant_table= quant_coeff[qscale];
1577 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1578 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1579 const unsigned int threshold2= (threshold1<<1);
1585 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1586 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1587 const unsigned int dc_threshold2= (dc_threshold1<<1);
1589 int level= block[0]*quant_coeff[qscale+18][0];
1590 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1592 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1595 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1598 // last_non_zero = i;
1603 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1604 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1605 const unsigned int dc_threshold2= (dc_threshold1<<1);
1607 int level= block[0]*quant_table[0];
1608 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1610 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1613 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1616 // last_non_zero = i;
1629 const int j= scantable[i];
1630 int level= block[j]*quant_table[j];
1632 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1633 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1634 if(((unsigned)(level+threshold1))>threshold2){
1636 level= (bias + level)>>QUANT_SHIFT;
1639 level= (bias - level)>>QUANT_SHIFT;
1648 return last_non_zero;
1651 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1652 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1653 int src_x_offset, int src_y_offset,
1654 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1655 MpegEncContext * const s = &h->s;
1656 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1657 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1658 const int luma_xy= (mx&3) + ((my&3)<<2);
1659 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1660 uint8_t * src_cb, * src_cr;
1661 int extra_width= h->emu_edge_width;
1662 int extra_height= h->emu_edge_height;
1664 const int full_mx= mx>>2;
1665 const int full_my= my>>2;
1666 const int pic_width = 16*s->mb_width;
1667 const int pic_height = 16*s->mb_height >> MB_FIELD;
1669 if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
1672 if(mx&7) extra_width -= 3;
1673 if(my&7) extra_height -= 3;
1675 if( full_mx < 0-extra_width
1676 || full_my < 0-extra_height
1677 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1678 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1679 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1680 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1684 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1686 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1689 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1692 // chroma offset when predicting from a field of opposite parity
1693 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1694 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1696 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1697 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1700 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1701 src_cb= s->edge_emu_buffer;
1703 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1706 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1707 src_cr= s->edge_emu_buffer;
1709 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1712 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1713 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1714 int x_offset, int y_offset,
1715 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1716 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1717 int list0, int list1){
1718 MpegEncContext * const s = &h->s;
1719 qpel_mc_func *qpix_op= qpix_put;
1720 h264_chroma_mc_func chroma_op= chroma_put;
1722 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1723 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1724 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1725 x_offset += 8*s->mb_x;
1726 y_offset += 8*(s->mb_y >> MB_FIELD);
1729 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1730 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1731 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1732 qpix_op, chroma_op);
1735 chroma_op= chroma_avg;
1739 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1740 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1741 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1742 qpix_op, chroma_op);
1746 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1747 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1748 int x_offset, int y_offset,
1749 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1750 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1751 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1752 int list0, int list1){
1753 MpegEncContext * const s = &h->s;
1755 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1756 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1757 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1758 x_offset += 8*s->mb_x;
1759 y_offset += 8*(s->mb_y >> MB_FIELD);
1762 /* don't optimize for luma-only case, since B-frames usually
1763 * use implicit weights => chroma too. */
1764 uint8_t *tmp_cb = s->obmc_scratchpad;
1765 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1766 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1767 int refn0 = h->ref_cache[0][ scan8[n] ];
1768 int refn1 = h->ref_cache[1][ scan8[n] ];
1770 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1771 dest_y, dest_cb, dest_cr,
1772 x_offset, y_offset, qpix_put, chroma_put);
1773 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1774 tmp_y, tmp_cb, tmp_cr,
1775 x_offset, y_offset, qpix_put, chroma_put);
1777 if(h->use_weight == 2){
1778 int weight0 = h->implicit_weight[refn0][refn1];
1779 int weight1 = 64 - weight0;
1780 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1781 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1782 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1784 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1785 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1786 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1787 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1788 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1789 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1790 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1791 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1792 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1795 int list = list1 ? 1 : 0;
1796 int refn = h->ref_cache[list][ scan8[n] ];
1797 Picture *ref= &h->ref_list[list][refn];
1798 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1799 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1800 qpix_put, chroma_put);
1802 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1803 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1804 if(h->use_weight_chroma){
1805 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1806 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1807 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1808 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1813 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1814 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1815 int x_offset, int y_offset,
1816 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1817 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1818 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1819 int list0, int list1){
1820 if((h->use_weight==2 && list0 && list1
1821 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1822 || h->use_weight==1)
1823 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1824 x_offset, y_offset, qpix_put, chroma_put,
1825 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1827 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1828 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1831 static inline void prefetch_motion(H264Context *h, int list){
1832 /* fetch pixels for estimated mv 4 macroblocks ahead
1833 * optimized for 64byte cache lines */
1834 MpegEncContext * const s = &h->s;
1835 const int refn = h->ref_cache[list][scan8[0]];
1837 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1838 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1839 uint8_t **src= h->ref_list[list][refn].data;
1840 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1841 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1842 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1843 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1847 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1848 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1849 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1850 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1851 MpegEncContext * const s = &h->s;
1852 const int mb_xy= h->mb_xy;
1853 const int mb_type= s->current_picture.mb_type[mb_xy];
1855 assert(IS_INTER(mb_type));
1857 prefetch_motion(h, 0);
1859 if(IS_16X16(mb_type)){
1860 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1861 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1862 &weight_op[0], &weight_avg[0],
1863 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1864 }else if(IS_16X8(mb_type)){
1865 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1866 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1867 &weight_op[1], &weight_avg[1],
1868 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1869 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1870 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1871 &weight_op[1], &weight_avg[1],
1872 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1873 }else if(IS_8X16(mb_type)){
1874 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1875 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1876 &weight_op[2], &weight_avg[2],
1877 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1878 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1879 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1880 &weight_op[2], &weight_avg[2],
1881 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1885 assert(IS_8X8(mb_type));
1888 const int sub_mb_type= h->sub_mb_type[i];
1890 int x_offset= (i&1)<<2;
1891 int y_offset= (i&2)<<1;
1893 if(IS_SUB_8X8(sub_mb_type)){
1894 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1895 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1896 &weight_op[3], &weight_avg[3],
1897 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1898 }else if(IS_SUB_8X4(sub_mb_type)){
1899 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1900 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1901 &weight_op[4], &weight_avg[4],
1902 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1903 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1904 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1905 &weight_op[4], &weight_avg[4],
1906 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1907 }else if(IS_SUB_4X8(sub_mb_type)){
1908 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1909 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1910 &weight_op[5], &weight_avg[5],
1911 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1912 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1913 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1914 &weight_op[5], &weight_avg[5],
1915 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1918 assert(IS_SUB_4X4(sub_mb_type));
1920 int sub_x_offset= x_offset + 2*(j&1);
1921 int sub_y_offset= y_offset + (j&2);
1922 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1923 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1924 &weight_op[6], &weight_avg[6],
1925 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1931 prefetch_motion(h, 1);
1934 static av_cold void decode_init_vlc(void){
1935 static int done = 0;
1941 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1942 &chroma_dc_coeff_token_len [0], 1, 1,
1943 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1946 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1947 &coeff_token_len [i][0], 1, 1,
1948 &coeff_token_bits[i][0], 1, 1, 1);
1952 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1953 &chroma_dc_total_zeros_len [i][0], 1, 1,
1954 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1956 for(i=0; i<15; i++){
1957 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1958 &total_zeros_len [i][0], 1, 1,
1959 &total_zeros_bits[i][0], 1, 1, 1);
1963 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1964 &run_len [i][0], 1, 1,
1965 &run_bits[i][0], 1, 1, 1);
1967 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1968 &run_len [6][0], 1, 1,
1969 &run_bits[6][0], 1, 1, 1);
1973 static void free_tables(H264Context *h){
1976 av_freep(&h->intra4x4_pred_mode);
1977 av_freep(&h->chroma_pred_mode_table);
1978 av_freep(&h->cbp_table);
1979 av_freep(&h->mvd_table[0]);
1980 av_freep(&h->mvd_table[1]);
1981 av_freep(&h->direct_table);
1982 av_freep(&h->non_zero_count);
1983 av_freep(&h->slice_table_base);
1984 h->slice_table= NULL;
1986 av_freep(&h->mb2b_xy);
1987 av_freep(&h->mb2b8_xy);
1989 for(i = 0; i < MAX_SPS_COUNT; i++)
1990 av_freep(h->sps_buffers + i);
1992 for(i = 0; i < MAX_PPS_COUNT; i++)
1993 av_freep(h->pps_buffers + i);
1995 for(i = 0; i < h->s.avctx->thread_count; i++) {
1996 hx = h->thread_context[i];
1998 av_freep(&hx->top_borders[1]);
1999 av_freep(&hx->top_borders[0]);
2000 av_freep(&hx->s.obmc_scratchpad);
2004 static void init_dequant8_coeff_table(H264Context *h){
2006 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2007 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2008 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2010 for(i=0; i<2; i++ ){
2011 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2012 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2016 for(q=0; q<52; q++){
2017 int shift = ff_div6[q];
2018 int idx = ff_rem6[q];
2020 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2021 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2022 h->pps.scaling_matrix8[i][x]) << shift;
2027 static void init_dequant4_coeff_table(H264Context *h){
2029 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2030 for(i=0; i<6; i++ ){
2031 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2033 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2034 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2041 for(q=0; q<52; q++){
2042 int shift = ff_div6[q] + 2;
2043 int idx = ff_rem6[q];
2045 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2046 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2047 h->pps.scaling_matrix4[i][x]) << shift;
2052 static void init_dequant_tables(H264Context *h){
2054 init_dequant4_coeff_table(h);
2055 if(h->pps.transform_8x8_mode)
2056 init_dequant8_coeff_table(h);
2057 if(h->sps.transform_bypass){
2060 h->dequant4_coeff[i][0][x] = 1<<6;
2061 if(h->pps.transform_8x8_mode)
2064 h->dequant8_coeff[i][0][x] = 1<<6;
2071 * needs width/height
2073 static int alloc_tables(H264Context *h){
2074 MpegEncContext * const s = &h->s;
2075 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2078 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2080 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2081 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2082 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2084 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2085 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2086 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2087 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2089 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2090 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2092 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2093 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2094 for(y=0; y<s->mb_height; y++){
2095 for(x=0; x<s->mb_width; x++){
2096 const int mb_xy= x + y*s->mb_stride;
2097 const int b_xy = 4*x + 4*y*h->b_stride;
2098 const int b8_xy= 2*x + 2*y*h->b8_stride;
2100 h->mb2b_xy [mb_xy]= b_xy;
2101 h->mb2b8_xy[mb_xy]= b8_xy;
2105 s->obmc_scratchpad = NULL;
2107 if(!h->dequant4_coeff[0])
2108 init_dequant_tables(h);
2117 * Mimic alloc_tables(), but for every context thread.
2119 static void clone_tables(H264Context *dst, H264Context *src){
2120 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2121 dst->non_zero_count = src->non_zero_count;
2122 dst->slice_table = src->slice_table;
2123 dst->cbp_table = src->cbp_table;
2124 dst->mb2b_xy = src->mb2b_xy;
2125 dst->mb2b8_xy = src->mb2b8_xy;
2126 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2127 dst->mvd_table[0] = src->mvd_table[0];
2128 dst->mvd_table[1] = src->mvd_table[1];
2129 dst->direct_table = src->direct_table;
2131 dst->s.obmc_scratchpad = NULL;
2132 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2137 * Allocate buffers which are not shared amongst multiple threads.
2139 static int context_init(H264Context *h){
2140 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2141 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2145 return -1; // free_tables will clean up for us
2148 static av_cold void common_init(H264Context *h){
2149 MpegEncContext * const s = &h->s;
2151 s->width = s->avctx->width;
2152 s->height = s->avctx->height;
2153 s->codec_id= s->avctx->codec->id;
2155 ff_h264_pred_init(&h->hpc, s->codec_id);
2157 h->dequant_coeff_pps= -1;
2158 s->unrestricted_mv=1;
2159 s->decode=1; //FIXME
2161 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2162 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2165 static av_cold int decode_init(AVCodecContext *avctx){
2166 H264Context *h= avctx->priv_data;
2167 MpegEncContext * const s = &h->s;
2169 MPV_decode_defaults(s);
2174 s->out_format = FMT_H264;
2175 s->workaround_bugs= avctx->workaround_bugs;
2178 // s->decode_mb= ff_h263_decode_mb;
2179 s->quarter_sample = 1;
2182 if(avctx->codec_id == CODEC_ID_SVQ3)
2183 avctx->pix_fmt= PIX_FMT_YUVJ420P;
2185 avctx->pix_fmt= PIX_FMT_YUV420P;
2189 if(avctx->extradata_size > 0 && avctx->extradata &&
2190 *(char *)avctx->extradata == 1){
2197 h->thread_context[0] = h;
2198 h->outputed_poc = INT_MIN;
2202 static int frame_start(H264Context *h){
2203 MpegEncContext * const s = &h->s;
2206 if(MPV_frame_start(s, s->avctx) < 0)
2208 ff_er_frame_start(s);
2210 * MPV_frame_start uses pict_type to derive key_frame.
2211 * This is incorrect for H.264; IDR markings must be used.
2212 * Zero here; IDR markings per slice in frame or fields are ORed in later.
2213 * See decode_nal_units().
2215 s->current_picture_ptr->key_frame= 0;
2217 assert(s->linesize && s->uvlinesize);
2219 for(i=0; i<16; i++){
2220 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2221 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2224 h->block_offset[16+i]=
2225 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2226 h->block_offset[24+16+i]=
2227 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2230 /* can't be in alloc_tables because linesize isn't known there.
2231 * FIXME: redo bipred weight to not require extra buffer? */
2232 for(i = 0; i < s->avctx->thread_count; i++)
2233 if(!h->thread_context[i]->s.obmc_scratchpad)
2234 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2236 /* some macroblocks will be accessed before they're available */
2237 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2238 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2240 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2242 // We mark the current picture as non-reference after allocating it, so
2243 // that if we break out due to an error it can be released automatically
2244 // in the next MPV_frame_start().
2245 // SVQ3 as well as most other codecs have only last/next/current and thus
2246 // get released even with set reference, besides SVQ3 and others do not
2247 // mark frames as reference later "naturally".
2248 if(s->codec_id != CODEC_ID_SVQ3)
2249 s->current_picture_ptr->reference= 0;
2251 s->current_picture_ptr->field_poc[0]=
2252 s->current_picture_ptr->field_poc[1]= INT_MAX;
2253 assert(s->current_picture_ptr->long_ref==0);
2258 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2259 MpegEncContext * const s = &h->s;
2263 src_cb -= uvlinesize;
2264 src_cr -= uvlinesize;
2266 // There are two lines saved, the line above the the top macroblock of a pair,
2267 // and the line above the bottom macroblock
2268 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2269 for(i=1; i<17; i++){
2270 h->left_border[i]= src_y[15+i* linesize];
2273 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2274 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2276 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2277 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2278 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2280 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2281 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2283 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2284 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2288 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2289 MpegEncContext * const s = &h->s;
2296 if(h->deblocking_filter == 2) {
2298 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2299 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2301 deblock_left = (s->mb_x > 0);
2302 deblock_top = (s->mb_y > 0);
2305 src_y -= linesize + 1;
2306 src_cb -= uvlinesize + 1;
2307 src_cr -= uvlinesize + 1;
2309 #define XCHG(a,b,t,xchg)\
2316 for(i = !deblock_top; i<17; i++){
2317 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2322 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2323 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2324 if(s->mb_x+1 < s->mb_width){
2325 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2329 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2331 for(i = !deblock_top; i<9; i++){
2332 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2333 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2337 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2338 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2343 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2344 MpegEncContext * const s = &h->s;
2347 src_y -= 2 * linesize;
2348 src_cb -= 2 * uvlinesize;
2349 src_cr -= 2 * uvlinesize;
2351 // There are two lines saved, the line above the the top macroblock of a pair,
2352 // and the line above the bottom macroblock
2353 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2354 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2355 for(i=2; i<34; i++){
2356 h->left_border[i]= src_y[15+i* linesize];
2359 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2360 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2361 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2362 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2364 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2365 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2366 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2367 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2368 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2369 for(i=2; i<18; i++){
2370 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2371 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2373 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2374 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2375 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2376 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2380 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2381 MpegEncContext * const s = &h->s;
2384 int deblock_left = (s->mb_x > 0);
2385 int deblock_top = (s->mb_y > 1);
2387 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2389 src_y -= 2 * linesize + 1;
2390 src_cb -= 2 * uvlinesize + 1;
2391 src_cr -= 2 * uvlinesize + 1;
2393 #define XCHG(a,b,t,xchg)\
2400 for(i = (!deblock_top)<<1; i<34; i++){
2401 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2406 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2407 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2408 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2409 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2410 if(s->mb_x+1 < s->mb_width){
2411 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2412 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2416 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2418 for(i = (!deblock_top) << 1; i<18; i++){
2419 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2420 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2424 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2425 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2426 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2427 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2432 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2433 MpegEncContext * const s = &h->s;
2434 const int mb_x= s->mb_x;
2435 const int mb_y= s->mb_y;
2436 const int mb_xy= h->mb_xy;
2437 const int mb_type= s->current_picture.mb_type[mb_xy];
2438 uint8_t *dest_y, *dest_cb, *dest_cr;
2439 int linesize, uvlinesize /*dct_offset*/;
2441 int *block_offset = &h->block_offset[0];
2442 const unsigned int bottom = mb_y & 1;
2443 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2444 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2445 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2447 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2448 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2449 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2451 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2452 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2454 if (!simple && MB_FIELD) {
2455 linesize = h->mb_linesize = s->linesize * 2;
2456 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2457 block_offset = &h->block_offset[24];
2458 if(mb_y&1){ //FIXME move out of this function?
2459 dest_y -= s->linesize*15;
2460 dest_cb-= s->uvlinesize*7;
2461 dest_cr-= s->uvlinesize*7;
2465 for(list=0; list<h->list_count; list++){
2466 if(!USES_LIST(mb_type, list))
2468 if(IS_16X16(mb_type)){
2469 int8_t *ref = &h->ref_cache[list][scan8[0]];
2470 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2472 for(i=0; i<16; i+=4){
2473 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2474 int ref = h->ref_cache[list][scan8[i]];
2476 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2482 linesize = h->mb_linesize = s->linesize;
2483 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2484 // dct_offset = s->linesize * 16;
2487 if(transform_bypass){
2489 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2490 }else if(IS_8x8DCT(mb_type)){
2491 idct_dc_add = s->dsp.h264_idct8_dc_add;
2492 idct_add = s->dsp.h264_idct8_add;
2494 idct_dc_add = s->dsp.h264_idct_dc_add;
2495 idct_add = s->dsp.h264_idct_add;
2498 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2499 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2500 int mbt_y = mb_y&~1;
2501 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2502 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2503 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2504 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2507 if (!simple && IS_INTRA_PCM(mb_type)) {
2508 for (i=0; i<16; i++) {
2509 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
2511 for (i=0; i<8; i++) {
2512 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
2513 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
2516 if(IS_INTRA(mb_type)){
2517 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2518 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2520 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2521 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2522 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2525 if(IS_INTRA4x4(mb_type)){
2526 if(simple || !s->encoding){
2527 if(IS_8x8DCT(mb_type)){
2528 for(i=0; i<16; i+=4){
2529 uint8_t * const ptr= dest_y + block_offset[i];
2530 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2531 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2532 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2533 (h->topright_samples_available<<i)&0x4000, linesize);
2535 if(nnz == 1 && h->mb[i*16])
2536 idct_dc_add(ptr, h->mb + i*16, linesize);
2538 idct_add(ptr, h->mb + i*16, linesize);
2542 for(i=0; i<16; i++){
2543 uint8_t * const ptr= dest_y + block_offset[i];
2545 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2548 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2549 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2550 assert(mb_y || linesize <= block_offset[i]);
2551 if(!topright_avail){
2552 tr= ptr[3 - linesize]*0x01010101;
2553 topright= (uint8_t*) &tr;
2555 topright= ptr + 4 - linesize;
2559 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2560 nnz = h->non_zero_count_cache[ scan8[i] ];
2563 if(nnz == 1 && h->mb[i*16])
2564 idct_dc_add(ptr, h->mb + i*16, linesize);
2566 idct_add(ptr, h->mb + i*16, linesize);
2568 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2573 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2575 if(!transform_bypass)
2576 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2578 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2580 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2581 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2583 hl_motion(h, dest_y, dest_cb, dest_cr,
2584 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2585 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2586 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2590 if(!IS_INTRA4x4(mb_type)){
2592 if(IS_INTRA16x16(mb_type)){
2593 for(i=0; i<16; i++){
2594 if(h->non_zero_count_cache[ scan8[i] ])
2595 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2596 else if(h->mb[i*16])
2597 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2600 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2601 for(i=0; i<16; i+=di){
2602 int nnz = h->non_zero_count_cache[ scan8[i] ];
2604 if(nnz==1 && h->mb[i*16])
2605 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2607 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2612 for(i=0; i<16; i++){
2613 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2614 uint8_t * const ptr= dest_y + block_offset[i];
2615 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2621 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2622 uint8_t *dest[2] = {dest_cb, dest_cr};
2623 if(transform_bypass){
2624 idct_add = idct_dc_add = s->dsp.add_pixels4;
2626 idct_add = s->dsp.h264_idct_add;
2627 idct_dc_add = s->dsp.h264_idct_dc_add;
2628 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2629 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2632 for(i=16; i<16+8; i++){
2633 if(h->non_zero_count_cache[ scan8[i] ])
2634 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2635 else if(h->mb[i*16])
2636 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2639 for(i=16; i<16+8; i++){
2640 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2641 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2642 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2648 if(h->deblocking_filter) {
2649 if (!simple && FRAME_MBAFF) {
2650 //FIXME try deblocking one mb at a time?
2651 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2652 const int mb_y = s->mb_y - 1;
2653 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2654 const int mb_xy= mb_x + mb_y*s->mb_stride;
2655 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2656 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2657 if (!bottom) return;
2658 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2659 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2660 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2662 if(IS_INTRA(mb_type_top | mb_type_bottom))
2663 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2665 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2668 s->mb_y--; h->mb_xy -= s->mb_stride;
2669 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2670 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2671 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2672 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2673 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2675 s->mb_y++; h->mb_xy += s->mb_stride;
2676 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2677 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2678 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2679 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2680 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2682 tprintf(h->s.avctx, "call filter_mb\n");
2683 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2684 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2685 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2686 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2687 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2693 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2695 static void hl_decode_mb_simple(H264Context *h){
2696 hl_decode_mb_internal(h, 1);
2700 * Process a macroblock; this handles edge cases, such as interlacing.
2702 static void av_noinline hl_decode_mb_complex(H264Context *h){
2703 hl_decode_mb_internal(h, 0);
2706 static void hl_decode_mb(H264Context *h){
2707 MpegEncContext * const s = &h->s;
2708 const int mb_xy= h->mb_xy;
2709 const int mb_type= s->current_picture.mb_type[mb_xy];
2710 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
2711 (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
2713 if(ENABLE_H264_ENCODER && !s->decode)
2717 hl_decode_mb_complex(h);
2718 else hl_decode_mb_simple(h);
2721 static void pic_as_field(Picture *pic, const int parity){
2723 for (i = 0; i < 4; ++i) {
2724 if (parity == PICT_BOTTOM_FIELD)
2725 pic->data[i] += pic->linesize[i];
2726 pic->reference = parity;
2727 pic->linesize[i] *= 2;
2729 pic->poc= pic->field_poc[parity == PICT_BOTTOM_FIELD];
2732 static int split_field_copy(Picture *dest, Picture *src,
2733 int parity, int id_add){
2734 int match = !!(src->reference & parity);
2738 if(parity != PICT_FRAME){
2739 pic_as_field(dest, parity);
2741 dest->pic_id += id_add;
2748 static int build_def_list(Picture *def, Picture **in, int len, int is_long, int sel){
2752 while(i[0]<len || i[1]<len){
2753 while(i[0]<len && !(in[ i[0] ] && (in[ i[0] ]->reference & sel)))
2755 while(i[1]<len && !(in[ i[1] ] && (in[ i[1] ]->reference & (sel^3))))
2758 in[ i[0] ]->pic_id= is_long ? i[0] : in[ i[0] ]->frame_num;
2759 split_field_copy(&def[index++], in[ i[0]++ ], sel , 1);
2762 in[ i[1] ]->pic_id= is_long ? i[1] : in[ i[1] ]->frame_num;
2763 split_field_copy(&def[index++], in[ i[1]++ ], sel^3, 0);
2770 static int add_sorted(Picture **sorted, Picture **src, int len, int limit, int dir){
2775 best_poc= dir ? INT_MIN : INT_MAX;
2777 for(i=0; i<len; i++){
2778 const int poc= src[i]->poc;
2779 if(((poc > limit) ^ dir) && ((poc < best_poc) ^ dir)){
2781 sorted[out_i]= src[i];
2784 if(best_poc == (dir ? INT_MIN : INT_MAX))
2786 limit= sorted[out_i++]->poc - dir;
2792 * fills the default_ref_list.
2794 static int fill_default_ref_list(H264Context *h){
2795 MpegEncContext * const s = &h->s;
2798 if(h->slice_type_nos==FF_B_TYPE){
2799 Picture *sorted[32];
2804 cur_poc= s->current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
2806 cur_poc= s->current_picture_ptr->poc;
2808 for(list= 0; list<2; list++){
2809 len= add_sorted(sorted , h->short_ref, h->short_ref_count, cur_poc, 1^list);
2810 len+=add_sorted(sorted+len, h->short_ref, h->short_ref_count, cur_poc, 0^list);
2812 len= build_def_list(h->default_ref_list[list] , sorted , len, 0, s->picture_structure);
2813 len+=build_def_list(h->default_ref_list[list]+len, h->long_ref, 16 , 1, s->picture_structure);
2816 if(len < h->ref_count[list])
2817 memset(&h->default_ref_list[list][len], 0, sizeof(Picture)*(h->ref_count[list] - len));
2821 if(lens[0] == lens[1] && lens[1] > 1){
2822 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0] && i<lens[0]; i++);
2824 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2827 len = build_def_list(h->default_ref_list[0] , h->short_ref, h->short_ref_count, 0, s->picture_structure);
2828 len+= build_def_list(h->default_ref_list[0]+len, h-> long_ref, 16 , 1, s->picture_structure);
2830 if(len < h->ref_count[0])
2831 memset(&h->default_ref_list[0][len], 0, sizeof(Picture)*(h->ref_count[0] - len));
2834 for (i=0; i<h->ref_count[0]; i++) {
2835 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2837 if(h->slice_type_nos==FF_B_TYPE){
2838 for (i=0; i<h->ref_count[1]; i++) {
2839 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2846 static void print_short_term(H264Context *h);
2847 static void print_long_term(H264Context *h);
2850 * Extract structure information about the picture described by pic_num in
2851 * the current decoding context (frame or field). Note that pic_num is
2852 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2853 * @param pic_num picture number for which to extract structure information
2854 * @param structure one of PICT_XXX describing structure of picture
2856 * @return frame number (short term) or long term index of picture
2857 * described by pic_num
2859 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
2860 MpegEncContext * const s = &h->s;
2862 *structure = s->picture_structure;
2865 /* opposite field */
2866 *structure ^= PICT_FRAME;
2873 static int decode_ref_pic_list_reordering(H264Context *h){
2874 MpegEncContext * const s = &h->s;
2875 int list, index, pic_structure;
2877 print_short_term(h);
2880 for(list=0; list<h->list_count; list++){
2881 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2883 if(get_bits1(&s->gb)){
2884 int pred= h->curr_pic_num;
2886 for(index=0; ; index++){
2887 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2888 unsigned int pic_id;
2890 Picture *ref = NULL;
2892 if(reordering_of_pic_nums_idc==3)
2895 if(index >= h->ref_count[list]){
2896 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2900 if(reordering_of_pic_nums_idc<3){
2901 if(reordering_of_pic_nums_idc<2){
2902 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2905 if(abs_diff_pic_num > h->max_pic_num){
2906 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2910 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2911 else pred+= abs_diff_pic_num;
2912 pred &= h->max_pic_num - 1;
2914 frame_num = pic_num_extract(h, pred, &pic_structure);
2916 for(i= h->short_ref_count-1; i>=0; i--){
2917 ref = h->short_ref[i];
2918 assert(ref->reference);
2919 assert(!ref->long_ref);
2921 ref->frame_num == frame_num &&
2922 (ref->reference & pic_structure)
2930 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2932 long_idx= pic_num_extract(h, pic_id, &pic_structure);
2935 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
2938 ref = h->long_ref[long_idx];
2939 assert(!(ref && !ref->reference));
2940 if(ref && (ref->reference & pic_structure)){
2941 ref->pic_id= pic_id;
2942 assert(ref->long_ref);
2950 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2951 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2953 for(i=index; i+1<h->ref_count[list]; i++){
2954 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
2957 for(; i > index; i--){
2958 h->ref_list[list][i]= h->ref_list[list][i-1];
2960 h->ref_list[list][index]= *ref;
2962 pic_as_field(&h->ref_list[list][index], pic_structure);
2966 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2972 for(list=0; list<h->list_count; list++){
2973 for(index= 0; index < h->ref_count[list]; index++){
2974 if(!h->ref_list[list][index].data[0]){
2975 av_log(h->s.avctx, AV_LOG_ERROR, "Missing reference picture\n");
2976 h->ref_list[list][index]= s->current_picture; //FIXME this is not a sensible solution
2981 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
2982 direct_dist_scale_factor(h);
2983 direct_ref_list_init(h);
2987 static void fill_mbaff_ref_list(H264Context *h){
2989 for(list=0; list<2; list++){ //FIXME try list_count
2990 for(i=0; i<h->ref_count[list]; i++){
2991 Picture *frame = &h->ref_list[list][i];
2992 Picture *field = &h->ref_list[list][16+2*i];
2995 field[0].linesize[j] <<= 1;
2996 field[0].reference = PICT_TOP_FIELD;
2997 field[1] = field[0];
2999 field[1].data[j] += frame->linesize[j];
3000 field[1].reference = PICT_BOTTOM_FIELD;
3002 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3003 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3005 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3006 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3010 for(j=0; j<h->ref_count[1]; j++){
3011 for(i=0; i<h->ref_count[0]; i++)
3012 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3013 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3014 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3018 static int pred_weight_table(H264Context *h){
3019 MpegEncContext * const s = &h->s;
3021 int luma_def, chroma_def;
3024 h->use_weight_chroma= 0;
3025 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3026 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3027 luma_def = 1<<h->luma_log2_weight_denom;
3028 chroma_def = 1<<h->chroma_log2_weight_denom;
3030 for(list=0; list<2; list++){
3031 for(i=0; i<h->ref_count[list]; i++){
3032 int luma_weight_flag, chroma_weight_flag;
3034 luma_weight_flag= get_bits1(&s->gb);
3035 if(luma_weight_flag){
3036 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3037 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3038 if( h->luma_weight[list][i] != luma_def
3039 || h->luma_offset[list][i] != 0)
3042 h->luma_weight[list][i]= luma_def;
3043 h->luma_offset[list][i]= 0;
3047 chroma_weight_flag= get_bits1(&s->gb);
3048 if(chroma_weight_flag){
3051 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3052 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3053 if( h->chroma_weight[list][i][j] != chroma_def
3054 || h->chroma_offset[list][i][j] != 0)
3055 h->use_weight_chroma= 1;
3060 h->chroma_weight[list][i][j]= chroma_def;
3061 h->chroma_offset[list][i][j]= 0;
3066 if(h->slice_type_nos != FF_B_TYPE) break;
3068 h->use_weight= h->use_weight || h->use_weight_chroma;
3072 static void implicit_weight_table(H264Context *h){
3073 MpegEncContext * const s = &h->s;
3075 int cur_poc = s->current_picture_ptr->poc;
3077 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3078 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3080 h->use_weight_chroma= 0;
3085 h->use_weight_chroma= 2;
3086 h->luma_log2_weight_denom= 5;
3087 h->chroma_log2_weight_denom= 5;
3089 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3090 int poc0 = h->ref_list[0][ref0].poc;
3091 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3092 int poc1 = h->ref_list[1][ref1].poc;
3093 int td = av_clip(poc1 - poc0, -128, 127);
3095 int tb = av_clip(cur_poc - poc0, -128, 127);
3096 int tx = (16384 + (FFABS(td) >> 1)) / td;
3097 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3098 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3099 h->implicit_weight[ref0][ref1] = 32;
3101 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3103 h->implicit_weight[ref0][ref1] = 32;
3109 * Mark a picture as no longer needed for reference. The refmask
3110 * argument allows unreferencing of individual fields or the whole frame.
3111 * If the picture becomes entirely unreferenced, but is being held for
3112 * display purposes, it is marked as such.
3113 * @param refmask mask of fields to unreference; the mask is bitwise
3114 * anded with the reference marking of pic
3115 * @return non-zero if pic becomes entirely unreferenced (except possibly
3116 * for display purposes) zero if one of the fields remains in
3119 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3121 if (pic->reference &= refmask) {
3124 for(i = 0; h->delayed_pic[i]; i++)
3125 if(pic == h->delayed_pic[i]){
3126 pic->reference=DELAYED_PIC_REF;
3134 * instantaneous decoder refresh.
3136 static void idr(H264Context *h){
3139 for(i=0; i<16; i++){
3140 remove_long(h, i, 0);
3142 assert(h->long_ref_count==0);
3144 for(i=0; i<h->short_ref_count; i++){
3145 unreference_pic(h, h->short_ref[i], 0);
3146 h->short_ref[i]= NULL;
3148 h->short_ref_count=0;
3149 h->prev_frame_num= 0;
3150 h->prev_frame_num_offset= 0;
3155 /* forget old pics after a seek */
3156 static void flush_dpb(AVCodecContext *avctx){
3157 H264Context *h= avctx->priv_data;
3159 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
3160 if(h->delayed_pic[i])
3161 h->delayed_pic[i]->reference= 0;
3162 h->delayed_pic[i]= NULL;
3164 h->outputed_poc= INT_MIN;
3166 if(h->s.current_picture_ptr)
3167 h->s.current_picture_ptr->reference= 0;
3168 h->s.first_field= 0;
3169 ff_mpeg_flush(avctx);
3173 * Find a Picture in the short term reference list by frame number.
3174 * @param frame_num frame number to search for
3175 * @param idx the index into h->short_ref where returned picture is found
3176 * undefined if no picture found.
3177 * @return pointer to the found picture, or NULL if no pic with the provided
3178 * frame number is found
3180 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3181 MpegEncContext * const s = &h->s;
3184 for(i=0; i<h->short_ref_count; i++){
3185 Picture *pic= h->short_ref[i];
3186 if(s->avctx->debug&FF_DEBUG_MMCO)
3187 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3188 if(pic->frame_num == frame_num) {
3197 * Remove a picture from the short term reference list by its index in
3198 * that list. This does no checking on the provided index; it is assumed
3199 * to be valid. Other list entries are shifted down.
3200 * @param i index into h->short_ref of picture to remove.
3202 static void remove_short_at_index(H264Context *h, int i){
3203 assert(i >= 0 && i < h->short_ref_count);
3204 h->short_ref[i]= NULL;
3205 if (--h->short_ref_count)
3206 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3211 * @return the removed picture or NULL if an error occurs
3213 static Picture * remove_short(H264Context *h, int frame_num, int ref_mask){
3214 MpegEncContext * const s = &h->s;
3218 if(s->avctx->debug&FF_DEBUG_MMCO)
3219 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3221 pic = find_short(h, frame_num, &i);
3223 if(unreference_pic(h, pic, ref_mask))
3224 remove_short_at_index(h, i);
3231 * Remove a picture from the long term reference list by its index in
3233 * @return the removed picture or NULL if an error occurs
3235 static Picture * remove_long(H264Context *h, int i, int ref_mask){
3238 pic= h->long_ref[i];
3240 if(unreference_pic(h, pic, ref_mask)){
3241 assert(h->long_ref[i]->long_ref == 1);
3242 h->long_ref[i]->long_ref= 0;
3243 h->long_ref[i]= NULL;
3244 h->long_ref_count--;
3252 * print short term list
3254 static void print_short_term(H264Context *h) {
3256 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3257 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3258 for(i=0; i<h->short_ref_count; i++){
3259 Picture *pic= h->short_ref[i];
3260 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3266 * print long term list
3268 static void print_long_term(H264Context *h) {
3270 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3271 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3272 for(i = 0; i < 16; i++){
3273 Picture *pic= h->long_ref[i];
3275 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3282 * Executes the reference picture marking (memory management control operations).
3284 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3285 MpegEncContext * const s = &h->s;
3287 int current_ref_assigned=0;
3290 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3291 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3293 for(i=0; i<mmco_count; i++){
3294 int structure, frame_num;
3295 if(s->avctx->debug&FF_DEBUG_MMCO)
3296 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3298 if( mmco[i].opcode == MMCO_SHORT2UNUSED
3299 || mmco[i].opcode == MMCO_SHORT2LONG){
3300 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3301 pic = find_short(h, frame_num, &j);
3303 if(mmco[i].opcode != MMCO_SHORT2LONG || !h->long_ref[mmco[i].long_arg]
3304 || h->long_ref[mmco[i].long_arg]->frame_num != frame_num)
3305 av_log(h->s.avctx, AV_LOG_ERROR, "mmco: unref short failure\n");
3310 switch(mmco[i].opcode){
3311 case MMCO_SHORT2UNUSED:
3312 if(s->avctx->debug&FF_DEBUG_MMCO)
3313 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3314 remove_short(h, frame_num, structure ^ PICT_FRAME);
3316 case MMCO_SHORT2LONG:
3317 if (h->long_ref[mmco[i].long_arg] != pic)
3318 remove_long(h, mmco[i].long_arg, 0);
3320 remove_short_at_index(h, j);
3321 h->long_ref[ mmco[i].long_arg ]= pic;
3322 if (h->long_ref[ mmco[i].long_arg ]){
3323 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3324 h->long_ref_count++;
3327 case MMCO_LONG2UNUSED:
3328 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3329 pic = h->long_ref[j];
3331 remove_long(h, j, structure ^ PICT_FRAME);
3332 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3333 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3336 // Comment below left from previous code as it is an interresting note.
3337 /* First field in pair is in short term list or
3338 * at a different long term index.
3339 * This is not allowed; see 7.4.3.3, notes 2 and 3.
3340 * Report the problem and keep the pair where it is,
3341 * and mark this field valid.
3344 if (h->long_ref[mmco[i].long_arg] != s->current_picture_ptr) {
3345 remove_long(h, mmco[i].long_arg, 0);
3347 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3348 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3349 h->long_ref_count++;
3352 s->current_picture_ptr->reference |= s->picture_structure;
3353 current_ref_assigned=1;
3355 case MMCO_SET_MAX_LONG:
3356 assert(mmco[i].long_arg <= 16);
3357 // just remove the long term which index is greater than new max
3358 for(j = mmco[i].long_arg; j<16; j++){
3359 remove_long(h, j, 0);
3363 while(h->short_ref_count){
3364 remove_short(h, h->short_ref[0]->frame_num, 0);
3366 for(j = 0; j < 16; j++) {
3367 remove_long(h, j, 0);
3369 s->current_picture_ptr->poc=
3370 s->current_picture_ptr->field_poc[0]=
3371 s->current_picture_ptr->field_poc[1]=
3375 s->current_picture_ptr->frame_num= 0;
3381 if (!current_ref_assigned) {
3382 /* Second field of complementary field pair; the first field of
3383 * which is already referenced. If short referenced, it
3384 * should be first entry in short_ref. If not, it must exist
3385 * in long_ref; trying to put it on the short list here is an
3386 * error in the encoded bit stream (ref: 7.4.3.3, NOTE 2 and 3).
3388 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3389 /* Just mark the second field valid */
3390 s->current_picture_ptr->reference = PICT_FRAME;
3391 } else if (s->current_picture_ptr->long_ref) {
3392 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3393 "assignment for second field "
3394 "in complementary field pair "
3395 "(first field is long term)\n");
3397 pic= remove_short(h, s->current_picture_ptr->frame_num, 0);
3399 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3402 if(h->short_ref_count)
3403 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3405 h->short_ref[0]= s->current_picture_ptr;
3406 h->short_ref_count++;
3407 s->current_picture_ptr->reference |= s->picture_structure;
3411 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3413 /* We have too many reference frames, probably due to corrupted
3414 * stream. Need to discard one frame. Prevents overrun of the
3415 * short_ref and long_ref buffers.
3417 av_log(h->s.avctx, AV_LOG_ERROR,
3418 "number of reference frames exceeds max (probably "
3419 "corrupt input), discarding one\n");
3421 if (h->long_ref_count && !h->short_ref_count) {
3422 for (i = 0; i < 16; ++i)
3427 remove_long(h, i, 0);
3429 pic = h->short_ref[h->short_ref_count - 1];
3430 remove_short(h, pic->frame_num, 0);
3434 print_short_term(h);
3439 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3440 MpegEncContext * const s = &h->s;
3444 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3445 s->broken_link= get_bits1(gb) -1;
3447 h->mmco[0].opcode= MMCO_LONG;
3448 h->mmco[0].long_arg= 0;
3452 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3453 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3454 MMCOOpcode opcode= get_ue_golomb(gb);
3456 h->mmco[i].opcode= opcode;
3457 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3458 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3459 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3460 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3464 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3465 unsigned int long_arg= get_ue_golomb(gb);
3466 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3467 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3470 h->mmco[i].long_arg= long_arg;
3473 if(opcode > (unsigned)MMCO_LONG){
3474 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3477 if(opcode == MMCO_END)
3482 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3484 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3485 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3486 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3487 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3489 if (FIELD_PICTURE) {
3490 h->mmco[0].short_pic_num *= 2;
3491 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3492 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3502 static int init_poc(H264Context *h){
3503 MpegEncContext * const s = &h->s;
3504 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3506 Picture *cur = s->current_picture_ptr;
3508 h->frame_num_offset= h->prev_frame_num_offset;
3509 if(h->frame_num < h->prev_frame_num)
3510 h->frame_num_offset += max_frame_num;
3512 if(h->sps.poc_type==0){
3513 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3515 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3516 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3517 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3518 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3520 h->poc_msb = h->prev_poc_msb;
3521 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3523 field_poc[1] = h->poc_msb + h->poc_lsb;
3524 if(s->picture_structure == PICT_FRAME)
3525 field_poc[1] += h->delta_poc_bottom;
3526 }else if(h->sps.poc_type==1){
3527 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3530 if(h->sps.poc_cycle_length != 0)
3531 abs_frame_num = h->frame_num_offset + h->frame_num;
3535 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3538 expected_delta_per_poc_cycle = 0;
3539 for(i=0; i < h->sps.poc_cycle_length; i++)
3540 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3542 if(abs_frame_num > 0){
3543 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3544 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3546 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3547 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3548 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3552 if(h->nal_ref_idc == 0)
3553 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3555 field_poc[0] = expectedpoc + h->delta_poc[0];
3556 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3558 if(s->picture_structure == PICT_FRAME)
3559 field_poc[1] += h->delta_poc[1];
3561 int poc= 2*(h->frame_num_offset + h->frame_num);
3570 if(s->picture_structure != PICT_BOTTOM_FIELD)
3571 s->current_picture_ptr->field_poc[0]= field_poc[0];
3572 if(s->picture_structure != PICT_TOP_FIELD)
3573 s->current_picture_ptr->field_poc[1]= field_poc[1];
3574 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3581 * initialize scan tables
3583 static void init_scan_tables(H264Context *h){
3584 MpegEncContext * const s = &h->s;
3586 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3587 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3588 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3590 for(i=0; i<16; i++){
3591 #define T(x) (x>>2) | ((x<<2) & 0xF)
3592 h->zigzag_scan[i] = T(zigzag_scan[i]);
3593 h-> field_scan[i] = T( field_scan[i]);
3597 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3598 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3599 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3600 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3601 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3603 for(i=0; i<64; i++){
3604 #define T(x) (x>>3) | ((x&7)<<3)
3605 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3606 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3607 h->field_scan8x8[i] = T(field_scan8x8[i]);
3608 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3612 if(h->sps.transform_bypass){ //FIXME same ugly
3613 h->zigzag_scan_q0 = zigzag_scan;
3614 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3615 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3616 h->field_scan_q0 = field_scan;
3617 h->field_scan8x8_q0 = field_scan8x8;
3618 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3620 h->zigzag_scan_q0 = h->zigzag_scan;
3621 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3622 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3623 h->field_scan_q0 = h->field_scan;
3624 h->field_scan8x8_q0 = h->field_scan8x8;
3625 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3630 * Replicates H264 "master" context to thread contexts.
3632 static void clone_slice(H264Context *dst, H264Context *src)
3634 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3635 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3636 dst->s.current_picture = src->s.current_picture;
3637 dst->s.linesize = src->s.linesize;
3638 dst->s.uvlinesize = src->s.uvlinesize;
3639 dst->s.first_field = src->s.first_field;
3641 dst->prev_poc_msb = src->prev_poc_msb;
3642 dst->prev_poc_lsb = src->prev_poc_lsb;
3643 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3644 dst->prev_frame_num = src->prev_frame_num;
3645 dst->short_ref_count = src->short_ref_count;
3647 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3648 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3649 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3650 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3652 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3653 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3657 * decodes a slice header.
3658 * This will also call MPV_common_init() and frame_start() as needed.
3660 * @param h h264context
3661 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3663 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3665 static int decode_slice_header(H264Context *h, H264Context *h0){
3666 MpegEncContext * const s = &h->s;
3667 MpegEncContext * const s0 = &h0->s;
3668 unsigned int first_mb_in_slice;
3669 unsigned int pps_id;
3670 int num_ref_idx_active_override_flag;
3671 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3672 unsigned int slice_type, tmp, i, j;
3673 int default_ref_list_done = 0;
3674 int last_pic_structure;
3676 s->dropable= h->nal_ref_idc == 0;
3678 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3679 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3680 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3682 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3683 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3686 first_mb_in_slice= get_ue_golomb(&s->gb);
3688 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3689 h0->current_slice = 0;
3690 if (!s0->first_field)
3691 s->current_picture_ptr= NULL;
3694 slice_type= get_ue_golomb(&s->gb);
3696 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3701 h->slice_type_fixed=1;
3703 h->slice_type_fixed=0;
3705 slice_type= slice_type_map[ slice_type ];
3706 if (slice_type == FF_I_TYPE
3707 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3708 default_ref_list_done = 1;
3710 h->slice_type= slice_type;
3711 h->slice_type_nos= slice_type & 3;
3713 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
3714 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3715 av_log(h->s.avctx, AV_LOG_ERROR,
3716 "B picture before any references, skipping\n");
3720 pps_id= get_ue_golomb(&s->gb);
3721 if(pps_id>=MAX_PPS_COUNT){
3722 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3725 if(!h0->pps_buffers[pps_id]) {
3726 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS referenced\n");
3729 h->pps= *h0->pps_buffers[pps_id];
3731 if(!h0->sps_buffers[h->pps.sps_id]) {
3732 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS referenced\n");
3735 h->sps = *h0->sps_buffers[h->pps.sps_id];
3737 if(h == h0 && h->dequant_coeff_pps != pps_id){
3738 h->dequant_coeff_pps = pps_id;
3739 init_dequant_tables(h);
3742 s->mb_width= h->sps.mb_width;
3743 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3745 h->b_stride= s->mb_width*4;
3746 h->b8_stride= s->mb_width*2;
3748 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3749 if(h->sps.frame_mbs_only_flag)
3750 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3752 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3754 if (s->context_initialized
3755 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3757 return -1; // width / height changed during parallelized decoding
3761 if (!s->context_initialized) {
3763 return -1; // we cant (re-)initialize context during parallel decoding
3764 if (MPV_common_init(s) < 0)
3768 init_scan_tables(h);
3771 for(i = 1; i < s->avctx->thread_count; i++) {
3773 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3774 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3775 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3778 init_scan_tables(c);
3782 for(i = 0; i < s->avctx->thread_count; i++)
3783 if(context_init(h->thread_context[i]) < 0)
3786 s->avctx->width = s->width;
3787 s->avctx->height = s->height;
3788 s->avctx->sample_aspect_ratio= h->sps.sar;
3789 if(!s->avctx->sample_aspect_ratio.den)
3790 s->avctx->sample_aspect_ratio.den = 1;
3792 if(h->sps.timing_info_present_flag){
3793 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3794 if(h->x264_build > 0 && h->x264_build < 44)
3795 s->avctx->time_base.den *= 2;
3796 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3797 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3801 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3804 h->mb_aff_frame = 0;
3805 last_pic_structure = s0->picture_structure;
3806 if(h->sps.frame_mbs_only_flag){
3807 s->picture_structure= PICT_FRAME;
3809 if(get_bits1(&s->gb)) { //field_pic_flag
3810 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
3812 s->picture_structure= PICT_FRAME;
3813 h->mb_aff_frame = h->sps.mb_aff;
3816 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
3818 if(h0->current_slice == 0){
3819 while(h->frame_num != h->prev_frame_num &&
3820 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
3821 av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
3823 h->prev_frame_num++;
3824 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
3825 s->current_picture_ptr->frame_num= h->prev_frame_num;
3826 execute_ref_pic_marking(h, NULL, 0);
3829 /* See if we have a decoded first field looking for a pair... */
3830 if (s0->first_field) {
3831 assert(s0->current_picture_ptr);
3832 assert(s0->current_picture_ptr->data[0]);
3833 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
3835 /* figure out if we have a complementary field pair */
3836 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
3838 * Previous field is unmatched. Don't display it, but let it
3839 * remain for reference if marked as such.
3841 s0->current_picture_ptr = NULL;
3842 s0->first_field = FIELD_PICTURE;
3845 if (h->nal_ref_idc &&
3846 s0->current_picture_ptr->reference &&
3847 s0->current_picture_ptr->frame_num != h->frame_num) {
3849 * This and previous field were reference, but had
3850 * different frame_nums. Consider this field first in
3851 * pair. Throw away previous field except for reference
3854 s0->first_field = 1;
3855 s0->current_picture_ptr = NULL;
3858 /* Second field in complementary pair */
3859 s0->first_field = 0;
3864 /* Frame or first field in a potentially complementary pair */
3865 assert(!s0->current_picture_ptr);
3866 s0->first_field = FIELD_PICTURE;
3869 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
3870 s0->first_field = 0;
3877 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
3879 assert(s->mb_num == s->mb_width * s->mb_height);
3880 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
3881 first_mb_in_slice >= s->mb_num){
3882 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
3885 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
3886 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
3887 if (s->picture_structure == PICT_BOTTOM_FIELD)
3888 s->resync_mb_y = s->mb_y = s->mb_y + 1;
3889 assert(s->mb_y < s->mb_height);
3891 if(s->picture_structure==PICT_FRAME){
3892 h->curr_pic_num= h->frame_num;
3893 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
3895 h->curr_pic_num= 2*h->frame_num + 1;
3896 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3899 if(h->nal_unit_type == NAL_IDR_SLICE){
3900 get_ue_golomb(&s->gb); /* idr_pic_id */
3903 if(h->sps.poc_type==0){
3904 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3906 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3907 h->delta_poc_bottom= get_se_golomb(&s->gb);
3911 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3912 h->delta_poc[0]= get_se_golomb(&s->gb);
3914 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3915 h->delta_poc[1]= get_se_golomb(&s->gb);
3920 if(h->pps.redundant_pic_cnt_present){
3921 h->redundant_pic_count= get_ue_golomb(&s->gb);
3924 //set defaults, might be overridden a few lines later
3925 h->ref_count[0]= h->pps.ref_count[0];
3926 h->ref_count[1]= h->pps.ref_count[1];
3928 if(h->slice_type_nos != FF_I_TYPE){
3929 if(h->slice_type_nos == FF_B_TYPE){
3930 h->direct_spatial_mv_pred= get_bits1(&s->gb);
3932 num_ref_idx_active_override_flag= get_bits1(&s->gb);
3934 if(num_ref_idx_active_override_flag){
3935 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3936 if(h->slice_type_nos==FF_B_TYPE)
3937 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3939 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
3940 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3941 h->ref_count[0]= h->ref_count[1]= 1;
3945 if(h->slice_type_nos == FF_B_TYPE)
3952 if(!default_ref_list_done){
3953 fill_default_ref_list(h);
3956 if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0)
3959 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
3960 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
3961 pred_weight_table(h);
3962 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
3963 implicit_weight_table(h);
3968 decode_ref_pic_marking(h0, &s->gb);
3971 fill_mbaff_ref_list(h);
3973 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
3974 tmp = get_ue_golomb(&s->gb);
3976 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
3979 h->cabac_init_idc= tmp;
3982 h->last_qscale_diff = 0;
3983 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
3985 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
3989 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
3990 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
3991 //FIXME qscale / qp ... stuff
3992 if(h->slice_type == FF_SP_TYPE){
3993 get_bits1(&s->gb); /* sp_for_switch_flag */
3995 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
3996 get_se_golomb(&s->gb); /* slice_qs_delta */
3999 h->deblocking_filter = 1;
4000 h->slice_alpha_c0_offset = 0;
4001 h->slice_beta_offset = 0;
4002 if( h->pps.deblocking_filter_parameters_present ) {
4003 tmp= get_ue_golomb(&s->gb);
4005 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4008 h->deblocking_filter= tmp;
4009 if(h->deblocking_filter < 2)
4010 h->deblocking_filter^= 1; // 1<->0
4012 if( h->deblocking_filter ) {
4013 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4014 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4018 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4019 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
4020 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
4021 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4022 h->deblocking_filter= 0;
4024 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4025 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4026 /* Cheat slightly for speed:
4027 Do not bother to deblock across slices. */
4028 h->deblocking_filter = 2;
4030 h0->max_contexts = 1;
4031 if(!h0->single_decode_warning) {
4032 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4033 h0->single_decode_warning = 1;
4036 return 1; // deblocking switched inside frame
4041 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4042 slice_group_change_cycle= get_bits(&s->gb, ?);
4045 h0->last_slice_type = slice_type;
4046 h->slice_num = ++h0->current_slice;
4049 int *ref2frm= h->ref2frm[h->slice_num&15][j];
4053 ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
4054 +(h->ref_list[j][i].reference&3);
4057 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4058 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4060 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4061 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4063 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4065 av_get_pict_type_char(h->slice_type),
4066 pps_id, h->frame_num,
4067 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4068 h->ref_count[0], h->ref_count[1],
4070 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4072 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4073 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4083 static inline int get_level_prefix(GetBitContext *gb){
4087 OPEN_READER(re, gb);
4088 UPDATE_CACHE(re, gb);
4089 buf=GET_CACHE(re, gb);
4091 log= 32 - av_log2(buf);
4093 print_bin(buf>>(32-log), log);
4094 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4097 LAST_SKIP_BITS(re, gb, log);
4098 CLOSE_READER(re, gb);
4103 static inline int get_dct8x8_allowed(H264Context *h){
4106 if(!IS_SUB_8X8(h->sub_mb_type[i])
4107 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4114 * decodes a residual block.
4115 * @param n block index
4116 * @param scantable scantable
4117 * @param max_coeff number of coefficients in the block
4118 * @return <0 if an error occurred
4120 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4121 MpegEncContext * const s = &h->s;
4122 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4124 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4126 //FIXME put trailing_onex into the context
4128 if(n == CHROMA_DC_BLOCK_INDEX){
4129 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4130 total_coeff= coeff_token>>2;
4132 if(n == LUMA_DC_BLOCK_INDEX){
4133 total_coeff= pred_non_zero_count(h, 0);
4134 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4135 total_coeff= coeff_token>>2;
4137 total_coeff= pred_non_zero_count(h, n);
4138 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4139 total_coeff= coeff_token>>2;
4140 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4144 //FIXME set last_non_zero?
4148 if(total_coeff > (unsigned)max_coeff) {
4149 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4153 trailing_ones= coeff_token&3;
4154 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4155 assert(total_coeff<=16);
4157 for(i=0; i<trailing_ones; i++){
4158 level[i]= 1 - 2*get_bits1(gb);
4162 int level_code, mask;
4163 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4164 int prefix= get_level_prefix(gb);
4166 //first coefficient has suffix_length equal to 0 or 1
4167 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4169 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4171 level_code= (prefix<<suffix_length); //part
4172 }else if(prefix==14){
4174 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4176 level_code= prefix + get_bits(gb, 4); //part
4178 level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
4179 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4181 level_code += (1<<(prefix-3))-4096;
4184 if(trailing_ones < 3) level_code += 2;
4189 mask= -(level_code&1);
4190 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4193 //remaining coefficients have suffix_length > 0
4194 for(;i<total_coeff;i++) {
4195 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4196 prefix = get_level_prefix(gb);
4198 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4200 level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
4202 level_code += (1<<(prefix-3))-4096;
4204 mask= -(level_code&1);
4205 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4206 if(level_code > suffix_limit[suffix_length])
4211 if(total_coeff == max_coeff)
4214 if(n == CHROMA_DC_BLOCK_INDEX)
4215 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4217 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4220 coeff_num = zeros_left + total_coeff - 1;
4221 j = scantable[coeff_num];
4223 block[j] = level[0];
4224 for(i=1;i<total_coeff;i++) {
4227 else if(zeros_left < 7){
4228 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4230 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4232 zeros_left -= run_before;
4233 coeff_num -= 1 + run_before;
4234 j= scantable[ coeff_num ];
4239 block[j] = (level[0] * qmul[j] + 32)>>6;
4240 for(i=1;i<total_coeff;i++) {
4243 else if(zeros_left < 7){
4244 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4246 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4248 zeros_left -= run_before;
4249 coeff_num -= 1 + run_before;
4250 j= scantable[ coeff_num ];
4252 block[j]= (level[i] * qmul[j] + 32)>>6;
4257 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4264 static void predict_field_decoding_flag(H264Context *h){
4265 MpegEncContext * const s = &h->s;
4266 const int mb_xy= h->mb_xy;
4267 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4268 ? s->current_picture.mb_type[mb_xy-1]
4269 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4270 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4272 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4276 * decodes a P_SKIP or B_SKIP macroblock
4278 static void decode_mb_skip(H264Context *h){
4279 MpegEncContext * const s = &h->s;
4280 const int mb_xy= h->mb_xy;
4283 memset(h->non_zero_count[mb_xy], 0, 16);
4284 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4287 mb_type|= MB_TYPE_INTERLACED;
4289 if( h->slice_type_nos == FF_B_TYPE )
4291 // just for fill_caches. pred_direct_motion will set the real mb_type
4292 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4294 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4295 pred_direct_motion(h, &mb_type);
4296 mb_type|= MB_TYPE_SKIP;
4301 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4303 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4304 pred_pskip_motion(h, &mx, &my);
4305 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4306 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4309 write_back_motion(h, mb_type);
4310 s->current_picture.mb_type[mb_xy]= mb_type;
4311 s->current_picture.qscale_table[mb_xy]= s->qscale;
4312 h->slice_table[ mb_xy ]= h->slice_num;
4313 h->prev_mb_skipped= 1;
4317 * decodes a macroblock
4318 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4320 static int decode_mb_cavlc(H264Context *h){
4321 MpegEncContext * const s = &h->s;
4323 int partition_count;
4324 unsigned int mb_type, cbp;
4325 int dct8x8_allowed= h->pps.transform_8x8_mode;
4327 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4329 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4331 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4332 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4334 if(h->slice_type_nos != FF_I_TYPE){
4335 if(s->mb_skip_run==-1)
4336 s->mb_skip_run= get_ue_golomb(&s->gb);
4338 if (s->mb_skip_run--) {
4339 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4340 if(s->mb_skip_run==0)
4341 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4343 predict_field_decoding_flag(h);
4350 if( (s->mb_y&1) == 0 )
4351 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4354 h->prev_mb_skipped= 0;
4356 mb_type= get_ue_golomb(&s->gb);
4357 if(h->slice_type_nos == FF_B_TYPE){
4359 partition_count= b_mb_type_info[mb_type].partition_count;
4360 mb_type= b_mb_type_info[mb_type].type;
4363 goto decode_intra_mb;
4365 }else if(h->slice_type_nos == FF_P_TYPE){
4367 partition_count= p_mb_type_info[mb_type].partition_count;
4368 mb_type= p_mb_type_info[mb_type].type;
4371 goto decode_intra_mb;
4374 assert(h->slice_type_nos == FF_I_TYPE);
4375 if(h->slice_type == FF_SI_TYPE && mb_type)
4379 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4383 cbp= i_mb_type_info[mb_type].cbp;
4384 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4385 mb_type= i_mb_type_info[mb_type].type;
4389 mb_type |= MB_TYPE_INTERLACED;
4391 h->slice_table[ mb_xy ]= h->slice_num;
4393 if(IS_INTRA_PCM(mb_type)){
4396 // We assume these blocks are very rare so we do not optimize it.
4397 align_get_bits(&s->gb);
4399 // The pixels are stored in the same order as levels in h->mb array.
4400 for(x=0; x < (CHROMA ? 384 : 256); x++){
4401 ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
4404 // In deblocking, the quantizer is 0
4405 s->current_picture.qscale_table[mb_xy]= 0;
4406 // All coeffs are present
4407 memset(h->non_zero_count[mb_xy], 16, 16);
4409 s->current_picture.mb_type[mb_xy]= mb_type;
4414 h->ref_count[0] <<= 1;
4415 h->ref_count[1] <<= 1;
4418 fill_caches(h, mb_type, 0);
4421 if(IS_INTRA(mb_type)){
4423 // init_top_left_availability(h);
4424 if(IS_INTRA4x4(mb_type)){
4427 if(dct8x8_allowed && get_bits1(&s->gb)){
4428 mb_type |= MB_TYPE_8x8DCT;
4432 // fill_intra4x4_pred_table(h);
4433 for(i=0; i<16; i+=di){
4434 int mode= pred_intra_mode(h, i);
4436 if(!get_bits1(&s->gb)){
4437 const int rem_mode= get_bits(&s->gb, 3);
4438 mode = rem_mode + (rem_mode >= mode);
4442 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4444 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4446 write_back_intra_pred_mode(h);
4447 if( check_intra4x4_pred_mode(h) < 0)
4450 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4451 if(h->intra16x16_pred_mode < 0)
4455 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4458 h->chroma_pred_mode= pred_mode;
4460 }else if(partition_count==4){
4461 int i, j, sub_partition_count[4], list, ref[2][4];
4463 if(h->slice_type_nos == FF_B_TYPE){
4465 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4466 if(h->sub_mb_type[i] >=13){
4467 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4470 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4471 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4473 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4474 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4475 pred_direct_motion(h, &mb_type);
4476 h->ref_cache[0][scan8[4]] =
4477 h->ref_cache[1][scan8[4]] =
4478 h->ref_cache[0][scan8[12]] =
4479 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4482 assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
4484 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4485 if(h->sub_mb_type[i] >=4){
4486 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4489 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4490 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4494 for(list=0; list<h->list_count; list++){
4495 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4497 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4498 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4499 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4501 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4513 dct8x8_allowed = get_dct8x8_allowed(h);
4515 for(list=0; list<h->list_count; list++){
4517 if(IS_DIRECT(h->sub_mb_type[i])) {
4518 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4521 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4522 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4524 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4525 const int sub_mb_type= h->sub_mb_type[i];
4526 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4527 for(j=0; j<sub_partition_count[i]; j++){
4529 const int index= 4*i + block_width*j;
4530 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4531 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4532 mx += get_se_golomb(&s->gb);
4533 my += get_se_golomb(&s->gb);
4534 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4536 if(IS_SUB_8X8(sub_mb_type)){
4538 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4540 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4541 }else if(IS_SUB_8X4(sub_mb_type)){
4542 mv_cache[ 1 ][0]= mx;
4543 mv_cache[ 1 ][1]= my;
4544 }else if(IS_SUB_4X8(sub_mb_type)){
4545 mv_cache[ 8 ][0]= mx;
4546 mv_cache[ 8 ][1]= my;
4548 mv_cache[ 0 ][0]= mx;
4549 mv_cache[ 0 ][1]= my;
4552 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4558 }else if(IS_DIRECT(mb_type)){
4559 pred_direct_motion(h, &mb_type);
4560 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4562 int list, mx, my, i;
4563 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4564 if(IS_16X16(mb_type)){
4565 for(list=0; list<h->list_count; list++){
4567 if(IS_DIR(mb_type, 0, list)){
4568 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4569 if(val >= h->ref_count[list]){
4570 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4574 val= LIST_NOT_USED&0xFF;
4575 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4577 for(list=0; list<h->list_count; list++){
4579 if(IS_DIR(mb_type, 0, list)){
4580 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4581 mx += get_se_golomb(&s->gb);
4582 my += get_se_golomb(&s->gb);
4583 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4585 val= pack16to32(mx,my);
4588 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4591 else if(IS_16X8(mb_type)){
4592 for(list=0; list<h->list_count; list++){
4595 if(IS_DIR(mb_type, i, list)){
4596 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4597 if(val >= h->ref_count[list]){
4598 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4602 val= LIST_NOT_USED&0xFF;
4603 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4606 for(list=0; list<h->list_count; list++){
4609 if(IS_DIR(mb_type, i, list)){
4610 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4611 mx += get_se_golomb(&s->gb);
4612 my += get_se_golomb(&s->gb);
4613 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4615 val= pack16to32(mx,my);
4618 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4622 assert(IS_8X16(mb_type));
4623 for(list=0; list<h->list_count; list++){
4626 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4627 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4628 if(val >= h->ref_count[list]){
4629 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4633 val= LIST_NOT_USED&0xFF;
4634 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4637 for(list=0; list<h->list_count; list++){
4640 if(IS_DIR(mb_type, i, list)){
4641 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4642 mx += get_se_golomb(&s->gb);
4643 my += get_se_golomb(&s->gb);
4644 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4646 val= pack16to32(mx,my);
4649 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4655 if(IS_INTER(mb_type))
4656 write_back_motion(h, mb_type);
4658 if(!IS_INTRA16x16(mb_type)){
4659 cbp= get_ue_golomb(&s->gb);
4661 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4666 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
4667 else cbp= golomb_to_inter_cbp [cbp];
4669 if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
4670 else cbp= golomb_to_inter_cbp_gray[cbp];
4675 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4676 if(get_bits1(&s->gb)){
4677 mb_type |= MB_TYPE_8x8DCT;
4678 h->cbp_table[mb_xy]= cbp;
4681 s->current_picture.mb_type[mb_xy]= mb_type;
4683 if(cbp || IS_INTRA16x16(mb_type)){
4684 int i8x8, i4x4, chroma_idx;
4686 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4687 const uint8_t *scan, *scan8x8, *dc_scan;
4689 // fill_non_zero_count_cache(h);
4691 if(IS_INTERLACED(mb_type)){
4692 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4693 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4694 dc_scan= luma_dc_field_scan;
4696 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4697 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4698 dc_scan= luma_dc_zigzag_scan;
4701 dquant= get_se_golomb(&s->gb);
4703 if( dquant > 25 || dquant < -26 ){
4704 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4708 s->qscale += dquant;
4709 if(((unsigned)s->qscale) > 51){
4710 if(s->qscale<0) s->qscale+= 52;
4711 else s->qscale-= 52;
4714 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4715 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4716 if(IS_INTRA16x16(mb_type)){
4717 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4718 return -1; //FIXME continue if partitioned and other return -1 too
4721 assert((cbp&15) == 0 || (cbp&15) == 15);
4724 for(i8x8=0; i8x8<4; i8x8++){
4725 for(i4x4=0; i4x4<4; i4x4++){
4726 const int index= i4x4 + 4*i8x8;
4727 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4733 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4736 for(i8x8=0; i8x8<4; i8x8++){
4737 if(cbp & (1<<i8x8)){
4738 if(IS_8x8DCT(mb_type)){
4739 DCTELEM *buf = &h->mb[64*i8x8];
4741 for(i4x4=0; i4x4<4; i4x4++){
4742 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4743 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4746 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4747 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4749 for(i4x4=0; i4x4<4; i4x4++){
4750 const int index= i4x4 + 4*i8x8;
4752 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4758 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4759 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4765 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4766 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4772 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4773 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4774 for(i4x4=0; i4x4<4; i4x4++){
4775 const int index= 16 + 4*chroma_idx + i4x4;
4776 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4782 uint8_t * const nnz= &h->non_zero_count_cache[0];
4783 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4784 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4787 uint8_t * const nnz= &h->non_zero_count_cache[0];
4788 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4789 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4790 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4792 s->current_picture.qscale_table[mb_xy]= s->qscale;
4793 write_back_non_zero_count(h);
4796 h->ref_count[0] >>= 1;
4797 h->ref_count[1] >>= 1;
4803 static int decode_cabac_field_decoding_flag(H264Context *h) {
4804 MpegEncContext * const s = &h->s;
4805 const int mb_x = s->mb_x;
4806 const int mb_y = s->mb_y & ~1;
4807 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4808 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
4810 unsigned int ctx = 0;
4812 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
4815 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
4819 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
4822 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
4823 uint8_t *state= &h->cabac_state[ctx_base];
4827 MpegEncContext * const s = &h->s;
4828 const int mba_xy = h->left_mb_xy[0];
4829 const int mbb_xy = h->top_mb_xy;
4831 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
4833 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
4835 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
4836 return 0; /* I4x4 */
4839 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
4840 return 0; /* I4x4 */
4843 if( get_cabac_terminate( &h->cabac ) )
4844 return 25; /* PCM */
4846 mb_type = 1; /* I16x16 */
4847 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
4848 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
4849 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
4850 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
4851 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
4855 static int decode_cabac_mb_type( H264Context *h ) {
4856 MpegEncContext * const s = &h->s;
4858 if( h->slice_type_nos == FF_I_TYPE ) {
4859 return decode_cabac_intra_mb_type(h, 3, 1);
4860 } else if( h->slice_type_nos == FF_P_TYPE ) {
4861 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
4863 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
4864 /* P_L0_D16x16, P_8x8 */
4865 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
4867 /* P_L0_D8x16, P_L0_D16x8 */
4868 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
4871 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
4873 } else if( h->slice_type_nos == FF_B_TYPE ) {
4874 const int mba_xy = h->left_mb_xy[0];
4875 const int mbb_xy = h->top_mb_xy;
4879 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
4881 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
4884 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
4885 return 0; /* B_Direct_16x16 */
4887 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
4888 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
4891 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
4892 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
4893 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
4894 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4896 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
4897 else if( bits == 13 ) {
4898 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
4899 } else if( bits == 14 )
4900 return 11; /* B_L1_L0_8x16 */
4901 else if( bits == 15 )
4902 return 22; /* B_8x8 */
4904 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
4905 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
4907 /* TODO SI/SP frames? */
4912 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
4913 MpegEncContext * const s = &h->s;
4917 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
4918 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
4921 && h->slice_table[mba_xy] == h->slice_num
4922 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
4923 mba_xy += s->mb_stride;
4925 mbb_xy = mb_xy - s->mb_stride;
4927 && h->slice_table[mbb_xy] == h->slice_num
4928 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
4929 mbb_xy -= s->mb_stride;
4931 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
4933 int mb_xy = h->mb_xy;
4935 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
4938 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
4940 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
4943 if( h->slice_type_nos == FF_B_TYPE )
4945 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
4948 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
4951 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
4954 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
4955 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
4956 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
4958 if( mode >= pred_mode )
4964 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
4965 const int mba_xy = h->left_mb_xy[0];
4966 const int mbb_xy = h->top_mb_xy;
4970 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
4971 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
4974 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
4977 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
4980 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4982 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
4988 static int decode_cabac_mb_cbp_luma( H264Context *h) {
4989 int cbp_b, cbp_a, ctx, cbp = 0;
4991 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
4992 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
4994 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
4995 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
4996 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
4997 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
4998 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
4999 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5000 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5001 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5004 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5008 cbp_a = (h->left_cbp>>4)&0x03;
5009 cbp_b = (h-> top_cbp>>4)&0x03;
5012 if( cbp_a > 0 ) ctx++;
5013 if( cbp_b > 0 ) ctx += 2;
5014 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5018 if( cbp_a == 2 ) ctx++;
5019 if( cbp_b == 2 ) ctx += 2;
5020 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5022 static int decode_cabac_mb_dqp( H264Context *h) {
5026 if( h->last_qscale_diff != 0 )
5029 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5035 if(val > 102) //prevent infinite loop
5042 return -(val + 1)/2;
5044 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5045 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5047 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5049 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5053 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5055 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5056 return 0; /* B_Direct_8x8 */
5057 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5058 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5060 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5061 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5062 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5065 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5066 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5070 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5071 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5074 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5075 int refa = h->ref_cache[list][scan8[n] - 1];
5076 int refb = h->ref_cache[list][scan8[n] - 8];
5080 if( h->slice_type_nos == FF_B_TYPE) {
5081 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5083 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5092 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5098 if(ref >= 32 /*h->ref_list[list]*/){
5099 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5100 return 0; //FIXME we should return -1 and check the return everywhere
5106 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5107 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5108 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5109 int ctxbase = (l == 0) ? 40 : 47;
5114 else if( amvd > 32 )
5119 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5124 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5132 while( get_cabac_bypass( &h->cabac ) ) {
5136 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5141 if( get_cabac_bypass( &h->cabac ) )
5145 return get_cabac_bypass_sign( &h->cabac, -mvd );
5148 static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
5154 nza = h->left_cbp&0x100;
5155 nzb = h-> top_cbp&0x100;
5157 nza = (h->left_cbp>>(6+idx))&0x01;
5158 nzb = (h-> top_cbp>>(6+idx))&0x01;
5162 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5163 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5165 assert(cat == 1 || cat == 2);
5166 nza = h->non_zero_count_cache[scan8[idx] - 1];
5167 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5177 return ctx + 4 * cat;
5180 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5181 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5182 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5183 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5184 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5187 static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
5188 static const int significant_coeff_flag_offset[2][6] = {
5189 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5190 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5192 static const int last_coeff_flag_offset[2][6] = {
5193 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5194 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5196 static const int coeff_abs_level_m1_offset[6] = {
5197 227+0, 227+10, 227+20, 227+30, 227+39, 426
5199 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5200 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5201 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5202 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5203 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5204 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5205 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5206 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5207 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5209 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5210 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5211 * map node ctx => cabac ctx for level=1 */
5212 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5213 /* map node ctx => cabac ctx for level>1 */
5214 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5215 static const uint8_t coeff_abs_level_transition[2][8] = {
5216 /* update node ctx after decoding a level=1 */
5217 { 1, 2, 3, 3, 4, 5, 6, 7 },
5218 /* update node ctx after decoding a level>1 */
5219 { 4, 4, 4, 4, 5, 6, 7, 7 }
5225 int coeff_count = 0;
5228 uint8_t *significant_coeff_ctx_base;
5229 uint8_t *last_coeff_ctx_base;
5230 uint8_t *abs_level_m1_ctx_base;
5233 #define CABAC_ON_STACK
5235 #ifdef CABAC_ON_STACK
5238 cc.range = h->cabac.range;
5239 cc.low = h->cabac.low;
5240 cc.bytestream= h->cabac.bytestream;
5242 #define CC &h->cabac
5246 /* cat: 0-> DC 16x16 n = 0
5247 * 1-> AC 16x16 n = luma4x4idx
5248 * 2-> Luma4x4 n = luma4x4idx
5249 * 3-> DC Chroma n = iCbCr
5250 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5251 * 5-> Luma8x8 n = 4 * luma8x8idx
5254 /* read coded block flag */
5255 if( is_dc || cat != 5 ) {
5256 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
5259 h->non_zero_count_cache[scan8[16+n]] = 0;
5261 h->non_zero_count_cache[scan8[n]] = 0;
5264 #ifdef CABAC_ON_STACK
5265 h->cabac.range = cc.range ;
5266 h->cabac.low = cc.low ;
5267 h->cabac.bytestream= cc.bytestream;
5273 significant_coeff_ctx_base = h->cabac_state
5274 + significant_coeff_flag_offset[MB_FIELD][cat];
5275 last_coeff_ctx_base = h->cabac_state
5276 + last_coeff_flag_offset[MB_FIELD][cat];
5277 abs_level_m1_ctx_base = h->cabac_state
5278 + coeff_abs_level_m1_offset[cat];
5280 if( !is_dc && cat == 5 ) {
5281 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5282 for(last= 0; last < coefs; last++) { \
5283 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5284 if( get_cabac( CC, sig_ctx )) { \
5285 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5286 index[coeff_count++] = last; \
5287 if( get_cabac( CC, last_ctx ) ) { \
5293 if( last == max_coeff -1 ) {\
5294 index[coeff_count++] = last;\
5296 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5297 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5298 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5300 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5302 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5304 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5307 assert(coeff_count > 0);
5311 h->cbp_table[h->mb_xy] |= 0x100;
5313 h->cbp_table[h->mb_xy] |= 0x40 << n;
5316 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5318 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5320 assert( cat == 1 || cat == 2 );
5321 h->non_zero_count_cache[scan8[n]] = coeff_count;
5326 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5328 int j= scantable[index[--coeff_count]];
5330 if( get_cabac( CC, ctx ) == 0 ) {
5331 node_ctx = coeff_abs_level_transition[0][node_ctx];
5333 block[j] = get_cabac_bypass_sign( CC, -1);
5335 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5339 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5340 node_ctx = coeff_abs_level_transition[1][node_ctx];
5342 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5346 if( coeff_abs >= 15 ) {
5348 while( get_cabac_bypass( CC ) ) {
5354 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5360 block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
5362 block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
5365 } while( coeff_count );
5366 #ifdef CABAC_ON_STACK
5367 h->cabac.range = cc.range ;
5368 h->cabac.low = cc.low ;
5369 h->cabac.bytestream= cc.bytestream;
5374 #ifndef CONFIG_SMALL
5375 static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5376 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 1);
5379 static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5380 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
5384 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
5386 decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, cat == 0 || cat == 3);
5388 if( cat == 0 || cat == 3 ) decode_cabac_residual_dc(h, block, cat, n, scantable, qmul, max_coeff);
5389 else decode_cabac_residual_nondc(h, block, cat, n, scantable, qmul, max_coeff);
5393 static inline void compute_mb_neighbors(H264Context *h)
5395 MpegEncContext * const s = &h->s;
5396 const int mb_xy = h->mb_xy;
5397 h->top_mb_xy = mb_xy - s->mb_stride;
5398 h->left_mb_xy[0] = mb_xy - 1;
5400 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5401 const int top_pair_xy = pair_xy - s->mb_stride;
5402 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5403 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5404 const int curr_mb_frame_flag = !MB_FIELD;
5405 const int bottom = (s->mb_y & 1);
5407 ? !curr_mb_frame_flag // bottom macroblock
5408 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5410 h->top_mb_xy -= s->mb_stride;
5412 if (left_mb_frame_flag != curr_mb_frame_flag) {
5413 h->left_mb_xy[0] = pair_xy - 1;
5415 } else if (FIELD_PICTURE) {
5416 h->top_mb_xy -= s->mb_stride;
5422 * decodes a macroblock
5423 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5425 static int decode_mb_cabac(H264Context *h) {
5426 MpegEncContext * const s = &h->s;
5428 int mb_type, partition_count, cbp = 0;
5429 int dct8x8_allowed= h->pps.transform_8x8_mode;
5431 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5433 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5435 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5436 if( h->slice_type_nos != FF_I_TYPE ) {
5438 /* a skipped mb needs the aff flag from the following mb */
5439 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5440 predict_field_decoding_flag(h);
5441 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5442 skip = h->next_mb_skipped;
5444 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5445 /* read skip flags */
5447 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5448 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5449 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5450 if(h->next_mb_skipped)
5451 predict_field_decoding_flag(h);
5453 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5458 h->cbp_table[mb_xy] = 0;
5459 h->chroma_pred_mode_table[mb_xy] = 0;
5460 h->last_qscale_diff = 0;
5467 if( (s->mb_y&1) == 0 )
5469 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5472 h->prev_mb_skipped = 0;
5474 compute_mb_neighbors(h);
5475 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5476 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5480 if( h->slice_type_nos == FF_B_TYPE ) {
5482 partition_count= b_mb_type_info[mb_type].partition_count;
5483 mb_type= b_mb_type_info[mb_type].type;
5486 goto decode_intra_mb;
5488 } else if( h->slice_type_nos == FF_P_TYPE ) {
5490 partition_count= p_mb_type_info[mb_type].partition_count;
5491 mb_type= p_mb_type_info[mb_type].type;
5494 goto decode_intra_mb;
5497 if(h->slice_type == FF_SI_TYPE && mb_type)
5499 assert(h->slice_type_nos == FF_I_TYPE);
5501 partition_count = 0;
5502 cbp= i_mb_type_info[mb_type].cbp;
5503 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5504 mb_type= i_mb_type_info[mb_type].type;
5507 mb_type |= MB_TYPE_INTERLACED;
5509 h->slice_table[ mb_xy ]= h->slice_num;
5511 if(IS_INTRA_PCM(mb_type)) {
5514 // We assume these blocks are very rare so we do not optimize it.
5515 // FIXME The two following lines get the bitstream position in the cabac
5516 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5517 ptr= h->cabac.bytestream;
5518 if(h->cabac.low&0x1) ptr--;
5520 if(h->cabac.low&0x1FF) ptr--;
5523 // The pixels are stored in the same order as levels in h->mb array.
5524 memcpy(h->mb, ptr, 256); ptr+=256;
5526 memcpy(h->mb+128, ptr, 128); ptr+=128;
5529 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5531 // All blocks are present
5532 h->cbp_table[mb_xy] = 0x1ef;
5533 h->chroma_pred_mode_table[mb_xy] = 0;
5534 // In deblocking, the quantizer is 0
5535 s->current_picture.qscale_table[mb_xy]= 0;
5536 // All coeffs are present
5537 memset(h->non_zero_count[mb_xy], 16, 16);
5538 s->current_picture.mb_type[mb_xy]= mb_type;
5539 h->last_qscale_diff = 0;
5544 h->ref_count[0] <<= 1;
5545 h->ref_count[1] <<= 1;
5548 fill_caches(h, mb_type, 0);
5550 if( IS_INTRA( mb_type ) ) {
5552 if( IS_INTRA4x4( mb_type ) ) {
5553 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5554 mb_type |= MB_TYPE_8x8DCT;
5555 for( i = 0; i < 16; i+=4 ) {
5556 int pred = pred_intra_mode( h, i );
5557 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5558 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5561 for( i = 0; i < 16; i++ ) {
5562 int pred = pred_intra_mode( h, i );
5563 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5565 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5568 write_back_intra_pred_mode(h);
5569 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5571 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5572 if( h->intra16x16_pred_mode < 0 ) return -1;
5575 h->chroma_pred_mode_table[mb_xy] =
5576 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5578 pred_mode= check_intra_pred_mode( h, pred_mode );
5579 if( pred_mode < 0 ) return -1;
5580 h->chroma_pred_mode= pred_mode;
5582 } else if( partition_count == 4 ) {
5583 int i, j, sub_partition_count[4], list, ref[2][4];
5585 if( h->slice_type_nos == FF_B_TYPE ) {
5586 for( i = 0; i < 4; i++ ) {
5587 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5588 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5589 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5591 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5592 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5593 pred_direct_motion(h, &mb_type);
5594 h->ref_cache[0][scan8[4]] =
5595 h->ref_cache[1][scan8[4]] =
5596 h->ref_cache[0][scan8[12]] =
5597 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5598 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5599 for( i = 0; i < 4; i++ )
5600 if( IS_DIRECT(h->sub_mb_type[i]) )
5601 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5605 for( i = 0; i < 4; i++ ) {
5606 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5607 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5608 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5612 for( list = 0; list < h->list_count; list++ ) {
5613 for( i = 0; i < 4; i++ ) {
5614 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5615 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5616 if( h->ref_count[list] > 1 )
5617 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5623 h->ref_cache[list][ scan8[4*i]+1 ]=
5624 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5629 dct8x8_allowed = get_dct8x8_allowed(h);
5631 for(list=0; list<h->list_count; list++){
5633 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5634 if(IS_DIRECT(h->sub_mb_type[i])){
5635 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5639 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5640 const int sub_mb_type= h->sub_mb_type[i];
5641 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5642 for(j=0; j<sub_partition_count[i]; j++){
5645 const int index= 4*i + block_width*j;
5646 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5647 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5648 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5650 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5651 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5652 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5654 if(IS_SUB_8X8(sub_mb_type)){
5656 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5658 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5661 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5663 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5664 }else if(IS_SUB_8X4(sub_mb_type)){
5665 mv_cache[ 1 ][0]= mx;
5666 mv_cache[ 1 ][1]= my;
5668 mvd_cache[ 1 ][0]= mx - mpx;
5669 mvd_cache[ 1 ][1]= my - mpy;
5670 }else if(IS_SUB_4X8(sub_mb_type)){
5671 mv_cache[ 8 ][0]= mx;
5672 mv_cache[ 8 ][1]= my;
5674 mvd_cache[ 8 ][0]= mx - mpx;
5675 mvd_cache[ 8 ][1]= my - mpy;
5677 mv_cache[ 0 ][0]= mx;
5678 mv_cache[ 0 ][1]= my;
5680 mvd_cache[ 0 ][0]= mx - mpx;
5681 mvd_cache[ 0 ][1]= my - mpy;
5684 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5685 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5686 p[0] = p[1] = p[8] = p[9] = 0;
5687 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5691 } else if( IS_DIRECT(mb_type) ) {
5692 pred_direct_motion(h, &mb_type);
5693 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5694 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5695 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5697 int list, mx, my, i, mpx, mpy;
5698 if(IS_16X16(mb_type)){
5699 for(list=0; list<h->list_count; list++){
5700 if(IS_DIR(mb_type, 0, list)){
5701 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5702 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5704 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5706 for(list=0; list<h->list_count; list++){
5707 if(IS_DIR(mb_type, 0, list)){
5708 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5710 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5711 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5712 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5714 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5715 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5717 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5720 else if(IS_16X8(mb_type)){
5721 for(list=0; list<h->list_count; list++){
5723 if(IS_DIR(mb_type, i, list)){
5724 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5725 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5727 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5730 for(list=0; list<h->list_count; list++){
5732 if(IS_DIR(mb_type, i, list)){
5733 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5734 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5735 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5736 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5738 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5739 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5741 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5742 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5747 assert(IS_8X16(mb_type));
5748 for(list=0; list<h->list_count; list++){
5750 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5751 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5752 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5754 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5757 for(list=0; list<h->list_count; list++){
5759 if(IS_DIR(mb_type, i, list)){
5760 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5761 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5762 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5764 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5765 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5766 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5768 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5769 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5776 if( IS_INTER( mb_type ) ) {
5777 h->chroma_pred_mode_table[mb_xy] = 0;
5778 write_back_motion( h, mb_type );
5781 if( !IS_INTRA16x16( mb_type ) ) {
5782 cbp = decode_cabac_mb_cbp_luma( h );
5784 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5787 h->cbp_table[mb_xy] = h->cbp = cbp;
5789 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5790 if( decode_cabac_mb_transform_size( h ) )
5791 mb_type |= MB_TYPE_8x8DCT;
5793 s->current_picture.mb_type[mb_xy]= mb_type;
5795 if( cbp || IS_INTRA16x16( mb_type ) ) {
5796 const uint8_t *scan, *scan8x8, *dc_scan;
5797 const uint32_t *qmul;
5800 if(IS_INTERLACED(mb_type)){
5801 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5802 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5803 dc_scan= luma_dc_field_scan;
5805 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5806 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5807 dc_scan= luma_dc_zigzag_scan;
5810 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5811 if( dqp == INT_MIN ){
5812 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5816 if(((unsigned)s->qscale) > 51){
5817 if(s->qscale<0) s->qscale+= 52;
5818 else s->qscale-= 52;
5820 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5821 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
5823 if( IS_INTRA16x16( mb_type ) ) {
5825 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
5826 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
5829 qmul = h->dequant4_coeff[0][s->qscale];
5830 for( i = 0; i < 16; i++ ) {
5831 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
5832 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
5835 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
5839 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
5840 if( cbp & (1<<i8x8) ) {
5841 if( IS_8x8DCT(mb_type) ) {
5842 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
5843 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
5845 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
5846 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
5847 const int index = 4*i8x8 + i4x4;
5848 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
5850 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
5851 //STOP_TIMER("decode_residual")
5855 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
5856 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
5863 for( c = 0; c < 2; c++ ) {
5864 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
5865 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
5871 for( c = 0; c < 2; c++ ) {
5872 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
5873 for( i = 0; i < 4; i++ ) {
5874 const int index = 16 + 4 * c + i;
5875 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
5876 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
5880 uint8_t * const nnz= &h->non_zero_count_cache[0];
5881 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5882 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5885 uint8_t * const nnz= &h->non_zero_count_cache[0];
5886 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
5887 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
5888 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
5889 h->last_qscale_diff = 0;
5892 s->current_picture.qscale_table[mb_xy]= s->qscale;
5893 write_back_non_zero_count(h);
5896 h->ref_count[0] >>= 1;
5897 h->ref_count[1] >>= 1;
5904 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5906 const int index_a = qp + h->slice_alpha_c0_offset;
5907 const int alpha = (alpha_table+52)[index_a];
5908 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5913 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
5914 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
5916 /* 16px edge length, because bS=4 is triggered by being at
5917 * the edge of an intra MB, so all 4 bS are the same */
5918 for( d = 0; d < 16; d++ ) {
5919 const int p0 = pix[-1];
5920 const int p1 = pix[-2];
5921 const int p2 = pix[-3];
5923 const int q0 = pix[0];
5924 const int q1 = pix[1];
5925 const int q2 = pix[2];
5927 if( FFABS( p0 - q0 ) < alpha &&
5928 FFABS( p1 - p0 ) < beta &&
5929 FFABS( q1 - q0 ) < beta ) {
5931 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
5932 if( FFABS( p2 - p0 ) < beta)
5934 const int p3 = pix[-4];
5936 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
5937 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
5938 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
5941 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5943 if( FFABS( q2 - q0 ) < beta)
5945 const int q3 = pix[3];
5947 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
5948 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
5949 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
5952 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5956 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
5957 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
5959 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
5965 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
5967 const int index_a = qp + h->slice_alpha_c0_offset;
5968 const int alpha = (alpha_table+52)[index_a];
5969 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
5974 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
5975 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
5977 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
5981 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
5983 for( i = 0; i < 16; i++, pix += stride) {
5989 int bS_index = (i >> 1);
5992 bS_index |= (i & 1);
5995 if( bS[bS_index] == 0 ) {
5999 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6000 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6001 alpha = (alpha_table+52)[index_a];
6002 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6004 if( bS[bS_index] < 4 ) {
6005 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6006 const int p0 = pix[-1];
6007 const int p1 = pix[-2];
6008 const int p2 = pix[-3];
6009 const int q0 = pix[0];
6010 const int q1 = pix[1];
6011 const int q2 = pix[2];
6013 if( FFABS( p0 - q0 ) < alpha &&
6014 FFABS( p1 - p0 ) < beta &&
6015 FFABS( q1 - q0 ) < beta ) {
6019 if( FFABS( p2 - p0 ) < beta ) {
6020 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6023 if( FFABS( q2 - q0 ) < beta ) {
6024 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6028 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6029 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6030 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6031 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6034 const int p0 = pix[-1];
6035 const int p1 = pix[-2];
6036 const int p2 = pix[-3];
6038 const int q0 = pix[0];
6039 const int q1 = pix[1];
6040 const int q2 = pix[2];
6042 if( FFABS( p0 - q0 ) < alpha &&
6043 FFABS( p1 - p0 ) < beta &&
6044 FFABS( q1 - q0 ) < beta ) {
6046 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6047 if( FFABS( p2 - p0 ) < beta)
6049 const int p3 = pix[-4];
6051 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6052 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6053 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6056 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6058 if( FFABS( q2 - q0 ) < beta)
6060 const int q3 = pix[3];
6062 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6063 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6064 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6067 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6071 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6072 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6074 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6079 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6081 for( i = 0; i < 8; i++, pix += stride) {
6089 if( bS[bS_index] == 0 ) {
6093 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6094 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6095 alpha = (alpha_table+52)[index_a];
6096 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6098 if( bS[bS_index] < 4 ) {
6099 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6100 const int p0 = pix[-1];
6101 const int p1 = pix[-2];
6102 const int q0 = pix[0];
6103 const int q1 = pix[1];
6105 if( FFABS( p0 - q0 ) < alpha &&
6106 FFABS( p1 - p0 ) < beta &&
6107 FFABS( q1 - q0 ) < beta ) {
6108 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6110 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6111 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6112 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6115 const int p0 = pix[-1];
6116 const int p1 = pix[-2];
6117 const int q0 = pix[0];
6118 const int q1 = pix[1];
6120 if( FFABS( p0 - q0 ) < alpha &&
6121 FFABS( p1 - p0 ) < beta &&
6122 FFABS( q1 - q0 ) < beta ) {
6124 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6125 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6126 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6132 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6134 const int index_a = qp + h->slice_alpha_c0_offset;
6135 const int alpha = (alpha_table+52)[index_a];
6136 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6137 const int pix_next = stride;
6142 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6143 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6145 /* 16px edge length, see filter_mb_edgev */
6146 for( d = 0; d < 16; d++ ) {
6147 const int p0 = pix[-1*pix_next];
6148 const int p1 = pix[-2*pix_next];
6149 const int p2 = pix[-3*pix_next];
6150 const int q0 = pix[0];
6151 const int q1 = pix[1*pix_next];
6152 const int q2 = pix[2*pix_next];
6154 if( FFABS( p0 - q0 ) < alpha &&
6155 FFABS( p1 - p0 ) < beta &&
6156 FFABS( q1 - q0 ) < beta ) {
6158 const int p3 = pix[-4*pix_next];
6159 const int q3 = pix[ 3*pix_next];
6161 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6162 if( FFABS( p2 - p0 ) < beta) {
6164 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6165 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6166 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6169 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6171 if( FFABS( q2 - q0 ) < beta) {
6173 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6174 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6175 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6178 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6182 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6183 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6185 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6192 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6194 const int index_a = qp + h->slice_alpha_c0_offset;
6195 const int alpha = (alpha_table+52)[index_a];
6196 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6201 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6202 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6204 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6208 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6209 MpegEncContext * const s = &h->s;
6210 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6212 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6216 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6218 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6219 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6220 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6223 assert(!FRAME_MBAFF);
6225 mb_type = s->current_picture.mb_type[mb_xy];
6226 qp = s->current_picture.qscale_table[mb_xy];
6227 qp0 = s->current_picture.qscale_table[mb_xy-1];
6228 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6229 qpc = get_chroma_qp( h, 0, qp );
6230 qpc0 = get_chroma_qp( h, 0, qp0 );
6231 qpc1 = get_chroma_qp( h, 0, qp1 );
6232 qp0 = (qp + qp0 + 1) >> 1;
6233 qp1 = (qp + qp1 + 1) >> 1;
6234 qpc0 = (qpc + qpc0 + 1) >> 1;
6235 qpc1 = (qpc + qpc1 + 1) >> 1;
6236 qp_thresh = 15 - h->slice_alpha_c0_offset;
6237 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6238 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6241 if( IS_INTRA(mb_type) ) {
6242 int16_t bS4[4] = {4,4,4,4};
6243 int16_t bS3[4] = {3,3,3,3};
6244 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6245 if( IS_8x8DCT(mb_type) ) {
6246 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6247 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6248 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6249 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6251 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6252 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6253 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6254 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6255 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6256 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6257 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6258 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6260 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6261 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6262 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6263 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6264 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6265 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6266 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6267 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6270 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6271 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6273 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6275 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6277 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6278 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6279 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6280 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6282 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6283 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6284 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6285 (h->slice_type_nos == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE);
6287 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6288 bSv[0][0] = 0x0004000400040004ULL;
6289 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6290 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6292 #define FILTER(hv,dir,edge)\
6293 if(bSv[dir][edge]) {\
6294 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6296 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6297 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6303 } else if( IS_8x8DCT(mb_type) ) {
6322 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6323 MpegEncContext * const s = &h->s;
6324 const int mb_xy= mb_x + mb_y*s->mb_stride;
6325 const int mb_type = s->current_picture.mb_type[mb_xy];
6326 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6327 int first_vertical_edge_done = 0;
6330 //for sufficiently low qp, filtering wouldn't do anything
6331 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6333 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6334 int qp = s->current_picture.qscale_table[mb_xy];
6336 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6337 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6342 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
6343 if(!h->pps.cabac && h->pps.transform_8x8_mode){
6344 int top_type, left_type[2];
6345 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
6346 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
6347 left_type[1] = s->current_picture.mb_type[h->left_mb_xy[1]];
6349 if(IS_8x8DCT(top_type)){
6350 h->non_zero_count_cache[4+8*0]=
6351 h->non_zero_count_cache[5+8*0]= h->cbp_table[h->top_mb_xy] & 4;
6352 h->non_zero_count_cache[6+8*0]=
6353 h->non_zero_count_cache[7+8*0]= h->cbp_table[h->top_mb_xy] & 8;
6355 if(IS_8x8DCT(left_type[0])){
6356 h->non_zero_count_cache[3+8*1]=
6357 h->non_zero_count_cache[3+8*2]= h->cbp_table[h->left_mb_xy[0]]&2; //FIXME check MBAFF
6359 if(IS_8x8DCT(left_type[1])){
6360 h->non_zero_count_cache[3+8*3]=
6361 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF
6364 if(IS_8x8DCT(mb_type)){
6365 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
6366 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
6368 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
6369 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
6371 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
6372 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
6374 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
6375 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
6380 // left mb is in picture
6381 && h->slice_table[mb_xy-1] != 255
6382 // and current and left pair do not have the same interlaced type
6383 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6384 // and left mb is in the same slice if deblocking_filter == 2
6385 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6386 /* First vertical edge is different in MBAFF frames
6387 * There are 8 different bS to compute and 2 different Qp
6389 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6390 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6395 int mb_qp, mbn0_qp, mbn1_qp;
6397 first_vertical_edge_done = 1;
6399 if( IS_INTRA(mb_type) )
6400 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6402 for( i = 0; i < 8; i++ ) {
6403 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6405 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6407 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6408 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6409 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6416 mb_qp = s->current_picture.qscale_table[mb_xy];
6417 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6418 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6419 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6420 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6421 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6422 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6423 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6424 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6425 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6426 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6427 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6428 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6431 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6432 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6433 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6434 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6435 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6437 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6438 for( dir = 0; dir < 2; dir++ )
6441 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6442 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6443 int (*ref2frm) [48+2] = h->ref2frm[ h->slice_num &15 ];
6444 int (*ref2frmm)[48+2] = h->ref2frm[ h->slice_table[mbm_xy]&15 ];
6445 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6447 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6448 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6449 // how often to recheck mv-based bS when iterating between edges
6450 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6451 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6452 // how often to recheck mv-based bS when iterating along each edge
6453 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6455 if (first_vertical_edge_done) {
6457 first_vertical_edge_done = 0;
6460 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6463 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6464 && !IS_INTERLACED(mb_type)
6465 && IS_INTERLACED(mbm_type)
6467 // This is a special case in the norm where the filtering must
6468 // be done twice (one each of the field) even if we are in a
6469 // frame macroblock.
6471 static const int nnz_idx[4] = {4,5,6,3};
6472 unsigned int tmp_linesize = 2 * linesize;
6473 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6474 int mbn_xy = mb_xy - 2 * s->mb_stride;
6479 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6480 if( IS_INTRA(mb_type) ||
6481 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6482 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6484 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6485 for( i = 0; i < 4; i++ ) {
6486 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6487 mbn_nnz[nnz_idx[i]] != 0 )
6493 // Do not use s->qscale as luma quantizer because it has not the same
6494 // value in IPCM macroblocks.
6495 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6496 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6497 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6498 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6499 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6500 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6501 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6502 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6509 for( edge = start; edge < edges; edge++ ) {
6510 /* mbn_xy: neighbor macroblock */
6511 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6512 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6513 int (*ref2frmn)[48+2] = edge > 0 ? ref2frm : ref2frmm;
6517 if( (edge&1) && IS_8x8DCT(mb_type) )
6520 if( IS_INTRA(mb_type) ||
6521 IS_INTRA(mbn_type) ) {
6524 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6525 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6534 bS[0] = bS[1] = bS[2] = bS[3] = value;
6539 if( edge & mask_edge ) {
6540 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6543 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6544 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6547 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6548 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6549 int bn_idx= b_idx - (dir ? 8:1);
6552 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6553 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6554 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6555 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6558 if(h->slice_type_nos == FF_B_TYPE && v){
6560 for( l = 0; !v && l < 2; l++ ) {
6562 v |= ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6563 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6564 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
6568 bS[0] = bS[1] = bS[2] = bS[3] = v;
6574 for( i = 0; i < 4; i++ ) {
6575 int x = dir == 0 ? edge : i;
6576 int y = dir == 0 ? i : edge;
6577 int b_idx= 8 + 4 + x + 8*y;
6578 int bn_idx= b_idx - (dir ? 8:1);
6580 if( h->non_zero_count_cache[b_idx] != 0 ||
6581 h->non_zero_count_cache[bn_idx] != 0 ) {
6587 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
6588 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[l][h->ref_cache[l][bn_idx]+2] ||
6589 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6590 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6596 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
6598 for( l = 0; l < 2; l++ ) {
6600 if( ref2frm[l][h->ref_cache[l][b_idx]+2] != ref2frmn[ln][h->ref_cache[ln][bn_idx]+2] ||
6601 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
6602 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
6611 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6616 // Do not use s->qscale as luma quantizer because it has not the same
6617 // value in IPCM macroblocks.
6618 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6619 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6620 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6621 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6623 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6624 if( (edge&1) == 0 ) {
6625 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6626 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6627 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6628 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6631 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6632 if( (edge&1) == 0 ) {
6633 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6634 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6635 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6636 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6643 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6644 MpegEncContext * const s = &h->s;
6645 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6649 if( h->pps.cabac ) {
6653 align_get_bits( &s->gb );
6656 ff_init_cabac_states( &h->cabac);
6657 ff_init_cabac_decoder( &h->cabac,
6658 s->gb.buffer + get_bits_count(&s->gb)/8,
6659 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6660 /* calculate pre-state */
6661 for( i= 0; i < 460; i++ ) {
6663 if( h->slice_type_nos == FF_I_TYPE )
6664 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6666 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6669 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6671 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6676 int ret = decode_mb_cabac(h);
6678 //STOP_TIMER("decode_mb_cabac")
6680 if(ret>=0) hl_decode_mb(h);
6682 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6685 if(ret>=0) ret = decode_mb_cabac(h);
6687 if(ret>=0) hl_decode_mb(h);
6690 eos = get_cabac_terminate( &h->cabac );
6692 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6693 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6694 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6698 if( ++s->mb_x >= s->mb_width ) {
6700 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6702 if(FIELD_OR_MBAFF_PICTURE) {
6707 if( eos || s->mb_y >= s->mb_height ) {
6708 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6709 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6716 int ret = decode_mb_cavlc(h);
6718 if(ret>=0) hl_decode_mb(h);
6720 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6722 ret = decode_mb_cavlc(h);
6724 if(ret>=0) hl_decode_mb(h);
6729 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6730 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6735 if(++s->mb_x >= s->mb_width){
6737 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6739 if(FIELD_OR_MBAFF_PICTURE) {
6742 if(s->mb_y >= s->mb_height){
6743 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6745 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6746 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6750 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6757 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6758 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6759 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6760 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6764 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6773 for(;s->mb_y < s->mb_height; s->mb_y++){
6774 for(;s->mb_x < s->mb_width; s->mb_x++){
6775 int ret= decode_mb(h);
6780 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6781 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6786 if(++s->mb_x >= s->mb_width){
6788 if(++s->mb_y >= s->mb_height){
6789 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6790 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6794 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6801 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6802 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6803 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6807 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6814 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6817 return -1; //not reached
6820 static int decode_unregistered_user_data(H264Context *h, int size){
6821 MpegEncContext * const s = &h->s;
6822 uint8_t user_data[16+256];
6828 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6829 user_data[i]= get_bits(&s->gb, 8);
6833 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6834 if(e==1 && build>=0)
6835 h->x264_build= build;
6837 if(s->avctx->debug & FF_DEBUG_BUGS)
6838 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6841 skip_bits(&s->gb, 8);
6846 static int decode_sei(H264Context *h){
6847 MpegEncContext * const s = &h->s;
6849 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6854 type+= show_bits(&s->gb, 8);
6855 }while(get_bits(&s->gb, 8) == 255);
6859 size+= show_bits(&s->gb, 8);
6860 }while(get_bits(&s->gb, 8) == 255);
6864 if(decode_unregistered_user_data(h, size) < 0)
6868 skip_bits(&s->gb, 8*size);
6871 //FIXME check bits here
6872 align_get_bits(&s->gb);
6878 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6879 MpegEncContext * const s = &h->s;
6881 cpb_count = get_ue_golomb(&s->gb) + 1;
6882 get_bits(&s->gb, 4); /* bit_rate_scale */
6883 get_bits(&s->gb, 4); /* cpb_size_scale */
6884 for(i=0; i<cpb_count; i++){
6885 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
6886 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
6887 get_bits1(&s->gb); /* cbr_flag */
6889 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
6890 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
6891 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
6892 get_bits(&s->gb, 5); /* time_offset_length */
6895 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
6896 MpegEncContext * const s = &h->s;
6897 int aspect_ratio_info_present_flag;
6898 unsigned int aspect_ratio_idc;
6899 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
6901 aspect_ratio_info_present_flag= get_bits1(&s->gb);
6903 if( aspect_ratio_info_present_flag ) {
6904 aspect_ratio_idc= get_bits(&s->gb, 8);
6905 if( aspect_ratio_idc == EXTENDED_SAR ) {
6906 sps->sar.num= get_bits(&s->gb, 16);
6907 sps->sar.den= get_bits(&s->gb, 16);
6908 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
6909 sps->sar= pixel_aspect[aspect_ratio_idc];
6911 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
6918 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6920 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
6921 get_bits1(&s->gb); /* overscan_appropriate_flag */
6924 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
6925 get_bits(&s->gb, 3); /* video_format */
6926 get_bits1(&s->gb); /* video_full_range_flag */
6927 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
6928 get_bits(&s->gb, 8); /* colour_primaries */
6929 get_bits(&s->gb, 8); /* transfer_characteristics */
6930 get_bits(&s->gb, 8); /* matrix_coefficients */
6934 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
6935 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
6936 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
6939 sps->timing_info_present_flag = get_bits1(&s->gb);
6940 if(sps->timing_info_present_flag){
6941 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
6942 sps->time_scale = get_bits_long(&s->gb, 32);
6943 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
6946 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
6947 if(nal_hrd_parameters_present_flag)
6948 decode_hrd_parameters(h, sps);
6949 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
6950 if(vcl_hrd_parameters_present_flag)
6951 decode_hrd_parameters(h, sps);
6952 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
6953 get_bits1(&s->gb); /* low_delay_hrd_flag */
6954 get_bits1(&s->gb); /* pic_struct_present_flag */
6956 sps->bitstream_restriction_flag = get_bits1(&s->gb);
6957 if(sps->bitstream_restriction_flag){
6958 unsigned int num_reorder_frames;
6959 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
6960 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
6961 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
6962 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
6963 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
6964 num_reorder_frames= get_ue_golomb(&s->gb);
6965 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
6967 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
6968 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
6972 sps->num_reorder_frames= num_reorder_frames;
6978 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
6979 const uint8_t *jvt_list, const uint8_t *fallback_list){
6980 MpegEncContext * const s = &h->s;
6981 int i, last = 8, next = 8;
6982 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
6983 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
6984 memcpy(factors, fallback_list, size*sizeof(uint8_t));
6986 for(i=0;i<size;i++){
6988 next = (last + get_se_golomb(&s->gb)) & 0xff;
6989 if(!i && !next){ /* matrix not written, we use the preset one */
6990 memcpy(factors, jvt_list, size*sizeof(uint8_t));
6993 last = factors[scan[i]] = next ? next : last;
6997 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
6998 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
6999 MpegEncContext * const s = &h->s;
7000 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7001 const uint8_t *fallback[4] = {
7002 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7003 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7004 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7005 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7007 if(get_bits1(&s->gb)){
7008 sps->scaling_matrix_present |= is_sps;
7009 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7010 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7011 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7012 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7013 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7014 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7015 if(is_sps || pps->transform_8x8_mode){
7016 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7017 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7019 } else if(fallback_sps) {
7020 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7021 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7026 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7029 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7030 const size_t size, const char *name)
7033 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7038 vec[id] = av_mallocz(size);
7040 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7045 static inline int decode_seq_parameter_set(H264Context *h){
7046 MpegEncContext * const s = &h->s;
7047 int profile_idc, level_idc;
7048 unsigned int sps_id, tmp, mb_width, mb_height;
7052 profile_idc= get_bits(&s->gb, 8);
7053 get_bits1(&s->gb); //constraint_set0_flag
7054 get_bits1(&s->gb); //constraint_set1_flag
7055 get_bits1(&s->gb); //constraint_set2_flag
7056 get_bits1(&s->gb); //constraint_set3_flag
7057 get_bits(&s->gb, 4); // reserved
7058 level_idc= get_bits(&s->gb, 8);
7059 sps_id= get_ue_golomb(&s->gb);
7061 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7065 sps->profile_idc= profile_idc;
7066 sps->level_idc= level_idc;
7068 if(sps->profile_idc >= 100){ //high profile
7069 sps->chroma_format_idc= get_ue_golomb(&s->gb);
7070 if(sps->chroma_format_idc == 3)
7071 get_bits1(&s->gb); //residual_color_transform_flag
7072 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7073 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7074 sps->transform_bypass = get_bits1(&s->gb);
7075 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7077 sps->scaling_matrix_present = 0;
7078 sps->chroma_format_idc= 1;
7081 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7082 sps->poc_type= get_ue_golomb(&s->gb);
7084 if(sps->poc_type == 0){ //FIXME #define
7085 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7086 } else if(sps->poc_type == 1){//FIXME #define
7087 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7088 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7089 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7090 tmp= get_ue_golomb(&s->gb);
7092 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7093 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7096 sps->poc_cycle_length= tmp;
7098 for(i=0; i<sps->poc_cycle_length; i++)
7099 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7100 }else if(sps->poc_type != 2){
7101 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7105 tmp= get_ue_golomb(&s->gb);
7106 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7107 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7110 sps->ref_frame_count= tmp;
7111 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7112 mb_width= get_ue_golomb(&s->gb) + 1;
7113 mb_height= get_ue_golomb(&s->gb) + 1;
7114 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7115 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7116 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7119 sps->mb_width = mb_width;
7120 sps->mb_height= mb_height;
7122 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7123 if(!sps->frame_mbs_only_flag)
7124 sps->mb_aff= get_bits1(&s->gb);
7128 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7130 #ifndef ALLOW_INTERLACE
7132 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7134 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7135 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7137 sps->crop= get_bits1(&s->gb);
7139 sps->crop_left = get_ue_golomb(&s->gb);
7140 sps->crop_right = get_ue_golomb(&s->gb);
7141 sps->crop_top = get_ue_golomb(&s->gb);
7142 sps->crop_bottom= get_ue_golomb(&s->gb);
7143 if(sps->crop_left || sps->crop_top){
7144 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7146 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7147 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7153 sps->crop_bottom= 0;
7156 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7157 if( sps->vui_parameters_present_flag )
7158 decode_vui_parameters(h, sps);
7160 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7161 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s %s\n",
7162 sps_id, sps->profile_idc, sps->level_idc,
7164 sps->ref_frame_count,
7165 sps->mb_width, sps->mb_height,
7166 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7167 sps->direct_8x8_inference_flag ? "8B8" : "",
7168 sps->crop_left, sps->crop_right,
7169 sps->crop_top, sps->crop_bottom,
7170 sps->vui_parameters_present_flag ? "VUI" : "",
7171 ((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
7178 build_qp_table(PPS *pps, int t, int index)
7181 for(i = 0; i < 52; i++)
7182 pps->chroma_qp_table[t][i] = chroma_qp[av_clip(i + index, 0, 51)];
7185 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7186 MpegEncContext * const s = &h->s;
7187 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7190 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7194 tmp= get_ue_golomb(&s->gb);
7195 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7196 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7201 pps->cabac= get_bits1(&s->gb);
7202 pps->pic_order_present= get_bits1(&s->gb);
7203 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7204 if(pps->slice_group_count > 1 ){
7205 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7206 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7207 switch(pps->mb_slice_group_map_type){
7210 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7211 | run_length[ i ] |1 |ue(v) |
7216 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7218 | top_left_mb[ i ] |1 |ue(v) |
7219 | bottom_right_mb[ i ] |1 |ue(v) |
7227 | slice_group_change_direction_flag |1 |u(1) |
7228 | slice_group_change_rate_minus1 |1 |ue(v) |
7233 | slice_group_id_cnt_minus1 |1 |ue(v) |
7234 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7236 | slice_group_id[ i ] |1 |u(v) |
7241 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7242 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7243 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7244 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7245 pps->ref_count[0]= pps->ref_count[1]= 1;
7249 pps->weighted_pred= get_bits1(&s->gb);
7250 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7251 pps->init_qp= get_se_golomb(&s->gb) + 26;
7252 pps->init_qs= get_se_golomb(&s->gb) + 26;
7253 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7254 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7255 pps->constrained_intra_pred= get_bits1(&s->gb);
7256 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7258 pps->transform_8x8_mode= 0;
7259 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7260 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7261 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7263 if(get_bits_count(&s->gb) < bit_length){
7264 pps->transform_8x8_mode= get_bits1(&s->gb);
7265 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7266 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7268 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7271 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7272 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7273 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1])
7274 h->pps.chroma_qp_diff= 1;
7276 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7277 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7278 pps_id, pps->sps_id,
7279 pps->cabac ? "CABAC" : "CAVLC",
7280 pps->slice_group_count,
7281 pps->ref_count[0], pps->ref_count[1],
7282 pps->weighted_pred ? "weighted" : "",
7283 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7284 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7285 pps->constrained_intra_pred ? "CONSTR" : "",
7286 pps->redundant_pic_cnt_present ? "REDU" : "",
7287 pps->transform_8x8_mode ? "8x8DCT" : ""
7295 * Call decode_slice() for each context.
7297 * @param h h264 master context
7298 * @param context_count number of contexts to execute
7300 static void execute_decode_slices(H264Context *h, int context_count){
7301 MpegEncContext * const s = &h->s;
7302 AVCodecContext * const avctx= s->avctx;
7306 if(context_count == 1) {
7307 decode_slice(avctx, h);
7309 for(i = 1; i < context_count; i++) {
7310 hx = h->thread_context[i];
7311 hx->s.error_resilience = avctx->error_resilience;
7312 hx->s.error_count = 0;
7315 avctx->execute(avctx, (void *)decode_slice,
7316 (void **)h->thread_context, NULL, context_count);
7318 /* pull back stuff from slices to master context */
7319 hx = h->thread_context[context_count - 1];
7320 s->mb_x = hx->s.mb_x;
7321 s->mb_y = hx->s.mb_y;
7322 s->dropable = hx->s.dropable;
7323 s->picture_structure = hx->s.picture_structure;
7324 for(i = 1; i < context_count; i++)
7325 h->s.error_count += h->thread_context[i]->s.error_count;
7330 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7331 MpegEncContext * const s = &h->s;
7332 AVCodecContext * const avctx= s->avctx;
7334 H264Context *hx; ///< thread context
7335 int context_count = 0;
7337 h->max_contexts = avctx->thread_count;
7340 for(i=0; i<50; i++){
7341 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7344 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7345 h->current_slice = 0;
7346 if (!s->first_field)
7347 s->current_picture_ptr= NULL;
7359 if(buf_index >= buf_size) break;
7361 for(i = 0; i < h->nal_length_size; i++)
7362 nalsize = (nalsize << 8) | buf[buf_index++];
7363 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7368 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7373 // start code prefix search
7374 for(; buf_index + 3 < buf_size; buf_index++){
7375 // This should always succeed in the first iteration.
7376 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7380 if(buf_index+3 >= buf_size) break;
7385 hx = h->thread_context[context_count];
7387 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7388 if (ptr==NULL || dst_length < 0){
7391 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7393 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7395 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7396 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7399 if (h->is_avc && (nalsize != consumed)){
7400 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7404 buf_index += consumed;
7406 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7407 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7412 switch(hx->nal_unit_type){
7414 if (h->nal_unit_type != NAL_IDR_SLICE) {
7415 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7418 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7420 init_get_bits(&hx->s.gb, ptr, bit_length);
7422 hx->inter_gb_ptr= &hx->s.gb;
7423 hx->s.data_partitioning = 0;
7425 if((err = decode_slice_header(hx, h)))
7428 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7429 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7430 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7431 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7432 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7433 && avctx->skip_frame < AVDISCARD_ALL)
7437 init_get_bits(&hx->s.gb, ptr, bit_length);
7439 hx->inter_gb_ptr= NULL;
7440 hx->s.data_partitioning = 1;
7442 err = decode_slice_header(hx, h);
7445 init_get_bits(&hx->intra_gb, ptr, bit_length);
7446 hx->intra_gb_ptr= &hx->intra_gb;
7449 init_get_bits(&hx->inter_gb, ptr, bit_length);
7450 hx->inter_gb_ptr= &hx->inter_gb;
7452 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7453 && s->context_initialized
7455 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7456 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
7457 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
7458 && avctx->skip_frame < AVDISCARD_ALL)
7462 init_get_bits(&s->gb, ptr, bit_length);
7466 init_get_bits(&s->gb, ptr, bit_length);
7467 decode_seq_parameter_set(h);
7469 if(s->flags& CODEC_FLAG_LOW_DELAY)
7472 if(avctx->has_b_frames < 2)
7473 avctx->has_b_frames= !s->low_delay;
7476 init_get_bits(&s->gb, ptr, bit_length);
7478 decode_picture_parameter_set(h, bit_length);
7482 case NAL_END_SEQUENCE:
7483 case NAL_END_STREAM:
7484 case NAL_FILLER_DATA:
7486 case NAL_AUXILIARY_SLICE:
7489 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7492 if(context_count == h->max_contexts) {
7493 execute_decode_slices(h, context_count);
7498 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7500 /* Slice could not be decoded in parallel mode, copy down
7501 * NAL unit stuff to context 0 and restart. Note that
7502 * rbsp_buffer is not transferred, but since we no longer
7503 * run in parallel mode this should not be an issue. */
7504 h->nal_unit_type = hx->nal_unit_type;
7505 h->nal_ref_idc = hx->nal_ref_idc;
7511 execute_decode_slices(h, context_count);
7516 * returns the number of bytes consumed for building the current frame
7518 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7519 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7520 if(pos+10>buf_size) pos=buf_size; // oops ;)
7525 static int decode_frame(AVCodecContext *avctx,
7526 void *data, int *data_size,
7527 const uint8_t *buf, int buf_size)
7529 H264Context *h = avctx->priv_data;
7530 MpegEncContext *s = &h->s;
7531 AVFrame *pict = data;
7534 s->flags= avctx->flags;
7535 s->flags2= avctx->flags2;
7537 /* end of stream, output what is still in the buffers */
7538 if (buf_size == 0) {
7542 //FIXME factorize this with the output code below
7543 out = h->delayed_pic[0];
7545 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7546 if(h->delayed_pic[i]->poc < out->poc){
7547 out = h->delayed_pic[i];
7551 for(i=out_idx; h->delayed_pic[i]; i++)
7552 h->delayed_pic[i] = h->delayed_pic[i+1];
7555 *data_size = sizeof(AVFrame);
7556 *pict= *(AVFrame*)out;
7562 if(h->is_avc && !h->got_avcC) {
7563 int i, cnt, nalsize;
7564 unsigned char *p = avctx->extradata;
7565 if(avctx->extradata_size < 7) {
7566 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7570 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7573 /* sps and pps in the avcC always have length coded with 2 bytes,
7574 so put a fake nal_length_size = 2 while parsing them */
7575 h->nal_length_size = 2;
7576 // Decode sps from avcC
7577 cnt = *(p+5) & 0x1f; // Number of sps
7579 for (i = 0; i < cnt; i++) {
7580 nalsize = AV_RB16(p) + 2;
7581 if(decode_nal_units(h, p, nalsize) < 0) {
7582 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7587 // Decode pps from avcC
7588 cnt = *(p++); // Number of pps
7589 for (i = 0; i < cnt; i++) {
7590 nalsize = AV_RB16(p) + 2;
7591 if(decode_nal_units(h, p, nalsize) != nalsize) {
7592 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7597 // Now store right nal length size, that will be use to parse all other nals
7598 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7599 // Do not reparse avcC
7603 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7604 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7608 buf_index=decode_nal_units(h, buf, buf_size);
7612 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7613 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7614 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7618 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7619 Picture *out = s->current_picture_ptr;
7620 Picture *cur = s->current_picture_ptr;
7621 int i, pics, cross_idr, out_of_order, out_idx;
7625 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7626 s->current_picture_ptr->pict_type= s->pict_type;
7629 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7630 h->prev_poc_msb= h->poc_msb;
7631 h->prev_poc_lsb= h->poc_lsb;
7633 h->prev_frame_num_offset= h->frame_num_offset;
7634 h->prev_frame_num= h->frame_num;
7637 * FIXME: Error handling code does not seem to support interlaced
7638 * when slices span multiple rows
7639 * The ff_er_add_slice calls don't work right for bottom
7640 * fields; they cause massive erroneous error concealing
7641 * Error marking covers both fields (top and bottom).
7642 * This causes a mismatched s->error_count
7643 * and a bad error table. Further, the error count goes to
7644 * INT_MAX when called for bottom field, because mb_y is
7645 * past end by one (callers fault) and resync_mb_y != 0
7646 * causes problems for the first MB line, too.
7653 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
7654 /* Wait for second field. */
7658 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7659 /* Derive top_field_first from field pocs. */
7660 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7662 //FIXME do something with unavailable reference frames
7664 /* Sort B-frames into display order */
7666 if(h->sps.bitstream_restriction_flag
7667 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7668 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7672 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
7673 && !h->sps.bitstream_restriction_flag){
7674 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
7679 while(h->delayed_pic[pics]) pics++;
7681 assert(pics <= MAX_DELAYED_PIC_COUNT);
7683 h->delayed_pic[pics++] = cur;
7684 if(cur->reference == 0)
7685 cur->reference = DELAYED_PIC_REF;
7687 out = h->delayed_pic[0];
7689 for(i=1; h->delayed_pic[i] && h->delayed_pic[i]->poc; i++)
7690 if(h->delayed_pic[i]->poc < out->poc){
7691 out = h->delayed_pic[i];
7694 cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i];
7696 out_of_order = !cross_idr && out->poc < h->outputed_poc;
7698 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7700 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
7702 ((!cross_idr && out->poc > h->outputed_poc + 2)
7703 || cur->pict_type == FF_B_TYPE)))
7706 s->avctx->has_b_frames++;
7709 if(out_of_order || pics > s->avctx->has_b_frames){
7710 out->reference &= ~DELAYED_PIC_REF;
7711 for(i=out_idx; h->delayed_pic[i]; i++)
7712 h->delayed_pic[i] = h->delayed_pic[i+1];
7714 if(!out_of_order && pics > s->avctx->has_b_frames){
7715 *data_size = sizeof(AVFrame);
7717 h->outputed_poc = out->poc;
7718 *pict= *(AVFrame*)out;
7720 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7725 assert(pict->data[0] || !*data_size);
7726 ff_print_debug_info(s, pict);
7727 //printf("out %d\n", (int)pict->data[0]);
7730 /* Return the Picture timestamp as the frame number */
7731 /* we subtract 1 because it is added on utils.c */
7732 avctx->frame_number = s->picture_number - 1;
7734 return get_consumed_bytes(s, buf_index, buf_size);
7737 static inline void fill_mb_avail(H264Context *h){
7738 MpegEncContext * const s = &h->s;
7739 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7742 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7743 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7744 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7750 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7751 h->mb_avail[4]= 1; //FIXME move out
7752 h->mb_avail[5]= 0; //FIXME move out
7760 #define SIZE (COUNT*40)
7766 // int int_temp[10000];
7768 AVCodecContext avctx;
7770 dsputil_init(&dsp, &avctx);
7772 init_put_bits(&pb, temp, SIZE);
7773 printf("testing unsigned exp golomb\n");
7774 for(i=0; i<COUNT; i++){
7776 set_ue_golomb(&pb, i);
7777 STOP_TIMER("set_ue_golomb");
7779 flush_put_bits(&pb);
7781 init_get_bits(&gb, temp, 8*SIZE);
7782 for(i=0; i<COUNT; i++){
7785 s= show_bits(&gb, 24);
7788 j= get_ue_golomb(&gb);
7790 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7793 STOP_TIMER("get_ue_golomb");
7797 init_put_bits(&pb, temp, SIZE);
7798 printf("testing signed exp golomb\n");
7799 for(i=0; i<COUNT; i++){
7801 set_se_golomb(&pb, i - COUNT/2);
7802 STOP_TIMER("set_se_golomb");
7804 flush_put_bits(&pb);
7806 init_get_bits(&gb, temp, 8*SIZE);
7807 for(i=0; i<COUNT; i++){
7810 s= show_bits(&gb, 24);
7813 j= get_se_golomb(&gb);
7814 if(j != i - COUNT/2){
7815 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7818 STOP_TIMER("get_se_golomb");
7822 printf("testing 4x4 (I)DCT\n");
7825 uint8_t src[16], ref[16];
7826 uint64_t error= 0, max_error=0;
7828 for(i=0; i<COUNT; i++){
7830 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7831 for(j=0; j<16; j++){
7832 ref[j]= random()%255;
7833 src[j]= random()%255;
7836 h264_diff_dct_c(block, src, ref, 4);
7839 for(j=0; j<16; j++){
7840 // printf("%d ", block[j]);
7841 block[j]= block[j]*4;
7842 if(j&1) block[j]= (block[j]*4 + 2)/5;
7843 if(j&4) block[j]= (block[j]*4 + 2)/5;
7847 s->dsp.h264_idct_add(ref, block, 4);
7848 /* for(j=0; j<16; j++){
7849 printf("%d ", ref[j]);
7853 for(j=0; j<16; j++){
7854 int diff= FFABS(src[j] - ref[j]);
7857 max_error= FFMAX(max_error, diff);
7860 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
7861 printf("testing quantizer\n");
7862 for(qp=0; qp<52; qp++){
7864 src1_block[i]= src2_block[i]= random()%255;
7867 printf("Testing NAL layer\n");
7869 uint8_t bitstream[COUNT];
7870 uint8_t nal[COUNT*2];
7872 memset(&h, 0, sizeof(H264Context));
7874 for(i=0; i<COUNT; i++){
7882 for(j=0; j<COUNT; j++){
7883 bitstream[j]= (random() % 255) + 1;
7886 for(j=0; j<zeros; j++){
7887 int pos= random() % COUNT;
7888 while(bitstream[pos] == 0){
7897 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
7899 printf("encoding failed\n");
7903 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
7907 if(out_length != COUNT){
7908 printf("incorrect length %d %d\n", out_length, COUNT);
7912 if(consumed != nal_length){
7913 printf("incorrect consumed length %d %d\n", nal_length, consumed);
7917 if(memcmp(bitstream, out, COUNT)){
7918 printf("mismatch\n");
7924 printf("Testing RBSP\n");
7932 static av_cold int decode_end(AVCodecContext *avctx)
7934 H264Context *h = avctx->priv_data;
7935 MpegEncContext *s = &h->s;
7937 av_freep(&h->rbsp_buffer[0]);
7938 av_freep(&h->rbsp_buffer[1]);
7939 free_tables(h); //FIXME cleanup init stuff perhaps
7942 // memset(h, 0, sizeof(H264Context));
7948 AVCodec h264_decoder = {
7952 sizeof(H264Context),
7957 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
7959 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),