2 * H.26L/H.264/AVC/JVT/14496-10/... direct mb/block decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 direct mb/block decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
31 #include "mpegutils.h"
32 #include "rectangle.h"
37 static int get_scale_factor(H264Context *const h, int poc, int poc1, int i)
39 int poc0 = h->ref_list[0][i].poc;
40 int td = av_clip(poc1 - poc0, -128, 127);
41 if (td == 0 || h->ref_list[0][i].long_ref) {
44 int tb = av_clip(poc - poc0, -128, 127);
45 int tx = (16384 + (FFABS(td) >> 1)) / td;
46 return av_clip((tb * tx + 32) >> 6, -1024, 1023);
50 void ff_h264_direct_dist_scale_factor(H264Context *const h)
52 const int poc = h->cur_pic_ptr->field_poc[h->picture_structure == PICT_BOTTOM_FIELD];
53 const int poc1 = h->ref_list[1][0].poc;
57 for (field = 0; field < 2; field++) {
58 const int poc = h->cur_pic_ptr->field_poc[field];
59 const int poc1 = h->ref_list[1][0].field_poc[field];
60 for (i = 0; i < 2 * h->ref_count[0]; i++)
61 h->dist_scale_factor_field[field][i ^ field] =
62 get_scale_factor(h, poc, poc1, i + 16);
65 for (i = 0; i < h->ref_count[0]; i++)
66 h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
69 static void fill_colmap(H264Context *h, int map[2][16 + 32], int list,
70 int field, int colfield, int mbafi)
72 H264Picture *const ref1 = &h->ref_list[1][0];
73 int j, old_ref, rfield;
74 int start = mbafi ? 16 : 0;
75 int end = mbafi ? 16 + 2 * h->ref_count[0] : h->ref_count[0];
76 int interl = mbafi || h->picture_structure != PICT_FRAME;
78 /* bogus; fills in for missing frames */
79 memset(map[list], 0, sizeof(map[list]));
81 for (rfield = 0; rfield < 2; rfield++) {
82 for (old_ref = 0; old_ref < ref1->ref_count[colfield][list]; old_ref++) {
83 int poc = ref1->ref_poc[colfield][list][old_ref];
87 // FIXME: store all MBAFF references so this is not needed
88 else if (interl && (poc & 3) == 3)
89 poc = (poc & ~3) + rfield + 1;
91 for (j = start; j < end; j++) {
92 if (4 * h->ref_list[0][j].frame_num +
93 (h->ref_list[0][j].reference & 3) == poc) {
94 int cur_ref = mbafi ? (j - 16) ^ field : j;
96 map[list][2 * old_ref + (rfield ^ field) + 16] = cur_ref;
97 if (rfield == field || !interl)
98 map[list][old_ref] = cur_ref;
106 void ff_h264_direct_ref_list_init(H264Context *const h)
108 H264Picture *const ref1 = &h->ref_list[1][0];
109 H264Picture *const cur = h->cur_pic_ptr;
111 int sidx = (h->picture_structure & 1) ^ 1;
112 int ref1sidx = (ref1->reference & 1) ^ 1;
114 for (list = 0; list < 2; list++) {
115 cur->ref_count[sidx][list] = h->ref_count[list];
116 for (j = 0; j < h->ref_count[list]; j++)
117 cur->ref_poc[sidx][list][j] = 4 * h->ref_list[list][j].frame_num +
118 (h->ref_list[list][j].reference & 3);
121 if (h->picture_structure == PICT_FRAME) {
122 memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
123 memcpy(cur->ref_poc[1], cur->ref_poc[0], sizeof(cur->ref_poc[0]));
126 cur->mbaff = FRAME_MBAFF(h);
129 if (h->picture_structure == PICT_FRAME) {
130 int cur_poc = h->cur_pic_ptr->poc;
131 int *col_poc = h->ref_list[1]->field_poc;
132 h->col_parity = (FFABS(col_poc[0] - cur_poc) >=
133 FFABS(col_poc[1] - cur_poc));
135 sidx = h->col_parity;
136 // FL -> FL & differ parity
137 } else if (!(h->picture_structure & h->ref_list[1][0].reference) &&
138 !h->ref_list[1][0].mbaff) {
139 h->col_fieldoff = 2 * h->ref_list[1][0].reference - 3;
142 if (h->slice_type_nos != AV_PICTURE_TYPE_B || h->direct_spatial_mv_pred)
145 for (list = 0; list < 2; list++) {
146 fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
148 for (field = 0; field < 2; field++)
149 fill_colmap(h, h->map_col_to_list0_field[field], list, field,
154 static void await_reference_mb_row(H264Context *const h, H264Picture *ref,
157 int ref_field = ref->reference - 1;
158 int ref_field_picture = ref->field_picture;
159 int ref_height = 16 * h->mb_height >> ref_field_picture;
161 if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_FRAME))
164 /* FIXME: It can be safe to access mb stuff
165 * even if pixels aren't deblocked yet. */
167 ff_thread_await_progress(&ref->tf,
168 FFMIN(16 * mb_y >> ref_field_picture,
170 ref_field_picture && ref_field);
173 static void pred_spatial_direct_motion(H264Context *const h, int *mb_type)
176 int b4_stride = h->b_stride;
177 int mb_xy = h->mb_xy, mb_y = h->mb_y;
179 const int16_t (*l1mv0)[2], (*l1mv1)[2];
180 const int8_t *l1ref0, *l1ref1;
181 const int is_b8x8 = IS_8X8(*mb_type);
182 unsigned int sub_mb_type = MB_TYPE_L0L1;
188 assert(h->ref_list[1][0].reference & 3);
190 await_reference_mb_row(h, &h->ref_list[1][0],
191 h->mb_y + !!IS_INTERLACED(*mb_type));
193 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16 | MB_TYPE_INTRA4x4 | \
194 MB_TYPE_INTRA16x16 | MB_TYPE_INTRA_PCM)
196 /* ref = min(neighbors) */
197 for (list = 0; list < 2; list++) {
198 int left_ref = h->ref_cache[list][scan8[0] - 1];
199 int top_ref = h->ref_cache[list][scan8[0] - 8];
200 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
201 const int16_t *C = h->mv_cache[list][scan8[0] - 8 + 4];
202 if (refc == PART_NOT_AVAILABLE) {
203 refc = h->ref_cache[list][scan8[0] - 8 - 1];
204 C = h->mv_cache[list][scan8[0] - 8 - 1];
206 ref[list] = FFMIN3((unsigned)left_ref,
209 if (ref[list] >= 0) {
210 /* This is just pred_motion() but with the cases removed that
211 * cannot happen for direct blocks. */
212 const int16_t *const A = h->mv_cache[list][scan8[0] - 1];
213 const int16_t *const B = h->mv_cache[list][scan8[0] - 8];
215 int match_count = (left_ref == ref[list]) +
216 (top_ref == ref[list]) +
219 if (match_count > 1) { // most common
220 mv[list] = pack16to32(mid_pred(A[0], B[0], C[0]),
221 mid_pred(A[1], B[1], C[1]));
223 assert(match_count == 1);
224 if (left_ref == ref[list])
225 mv[list] = AV_RN32A(A);
226 else if (top_ref == ref[list])
227 mv[list] = AV_RN32A(B);
229 mv[list] = AV_RN32A(C);
232 int mask = ~(MB_TYPE_L0 << (2 * list));
240 if (ref[0] < 0 && ref[1] < 0) {
243 *mb_type |= MB_TYPE_L0L1;
244 sub_mb_type |= MB_TYPE_L0L1;
247 if (!(is_b8x8 | mv[0] | mv[1])) {
248 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
249 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
250 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
251 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
252 *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
253 MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
254 MB_TYPE_16x16 | MB_TYPE_DIRECT2;
258 if (IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
259 if (!IS_INTERLACED(*mb_type)) { // AFR/FR -> AFL/FL
260 mb_y = (h->mb_y & ~1) + h->col_parity;
262 ((h->mb_y & ~1) + h->col_parity) * h->mb_stride;
265 mb_y += h->col_fieldoff;
266 mb_xy += h->mb_stride * h->col_fieldoff; // non-zero for FL -> FL & differ parity
269 } else { // AFL/AFR/FR/FL -> AFR/FR
270 if (IS_INTERLACED(*mb_type)) { // AFL /FL -> AFR/FR
272 mb_xy = (h->mb_y & ~1) * h->mb_stride + h->mb_x;
273 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
274 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + h->mb_stride];
275 b8_stride = 2 + 4 * h->mb_stride;
277 if (IS_INTERLACED(mb_type_col[0]) !=
278 IS_INTERLACED(mb_type_col[1])) {
279 mb_type_col[0] &= ~MB_TYPE_INTERLACED;
280 mb_type_col[1] &= ~MB_TYPE_INTERLACED;
283 sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */
284 if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) &&
285 (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) &&
287 *mb_type |= MB_TYPE_16x8 | MB_TYPE_DIRECT2; /* B_16x8 */
289 *mb_type |= MB_TYPE_8x8;
291 } else { // AFR/FR -> AFR/FR
294 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
296 sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */
297 if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) {
298 *mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_16x16 */
299 } else if (!is_b8x8 &&
300 (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) {
301 *mb_type |= MB_TYPE_DIRECT2 |
302 (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16));
304 if (!h->sps.direct_8x8_inference_flag) {
305 /* FIXME: Save sub mb types from previous frames (or derive
306 * from MVs) so we know exactly what block size to use. */
307 sub_mb_type += (MB_TYPE_8x8 - MB_TYPE_16x16); /* B_SUB_4x4 */
309 *mb_type |= MB_TYPE_8x8;
314 await_reference_mb_row(h, &h->ref_list[1][0], mb_y);
316 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy[mb_xy]];
317 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy[mb_xy]];
318 l1ref0 = &h->ref_list[1][0].ref_index[0][4 * mb_xy];
319 l1ref1 = &h->ref_list[1][0].ref_index[1][4 * mb_xy];
324 l1mv0 += 2 * b4_stride;
325 l1mv1 += 2 * b4_stride;
329 if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) {
331 for (i8 = 0; i8 < 4; i8++) {
334 int xy8 = x8 + y8 * b8_stride;
335 int xy4 = x8 * 3 + y8 * b4_stride;
338 if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
340 h->sub_mb_type[i8] = sub_mb_type;
342 fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
344 fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
346 if (!IS_INTRA(mb_type_col[y8]) && !h->ref_list[1][0].long_ref &&
347 ((l1ref0[xy8] == 0 &&
348 FFABS(l1mv0[xy4][0]) <= 1 &&
349 FFABS(l1mv0[xy4][1]) <= 1) ||
352 FFABS(l1mv1[xy4][0]) <= 1 &&
353 FFABS(l1mv1[xy4][1]) <= 1))) {
365 fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, a, 4);
366 fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, b, 4);
368 if (!is_b8x8 && !(n & 3))
369 *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
370 MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
371 MB_TYPE_16x16 | MB_TYPE_DIRECT2;
372 } else if (IS_16X16(*mb_type)) {
375 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
376 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
377 if (!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref &&
379 FFABS(l1mv0[0][0]) <= 1 &&
380 FFABS(l1mv0[0][1]) <= 1) ||
381 (l1ref0[0] < 0 && !l1ref1[0] &&
382 FFABS(l1mv1[0][0]) <= 1 &&
383 FFABS(l1mv1[0][1]) <= 1 &&
384 h->x264_build > 33U))) {
394 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
395 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
398 for (i8 = 0; i8 < 4; i8++) {
399 const int x8 = i8 & 1;
400 const int y8 = i8 >> 1;
402 if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
404 h->sub_mb_type[i8] = sub_mb_type;
406 fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, mv[0], 4);
407 fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, mv[1], 4);
408 fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
410 fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
413 assert(b8_stride == 2);
415 if (!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref &&
419 h->x264_build > 33U))) {
420 const int16_t (*l1mv)[2] = l1ref0[i8] == 0 ? l1mv0 : l1mv1;
421 if (IS_SUB_8X8(sub_mb_type)) {
422 const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
423 if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
425 fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2,
428 fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2,
434 for (i4 = 0; i4 < 4; i4++) {
435 const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
436 (y8 * 2 + (i4 >> 1)) * b4_stride];
437 if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
439 AV_ZERO32(h->mv_cache[0][scan8[i8 * 4 + i4]]);
441 AV_ZERO32(h->mv_cache[1][scan8[i8 * 4 + i4]]);
446 h->sub_mb_type[i8] += MB_TYPE_16x16 - MB_TYPE_8x8;
451 if (!is_b8x8 && !(n & 15))
452 *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
453 MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
454 MB_TYPE_16x16 | MB_TYPE_DIRECT2;
458 static void pred_temp_direct_motion(H264Context *const h, int *mb_type)
461 int b4_stride = h->b_stride;
462 int mb_xy = h->mb_xy, mb_y = h->mb_y;
464 const int16_t (*l1mv0)[2], (*l1mv1)[2];
465 const int8_t *l1ref0, *l1ref1;
466 const int is_b8x8 = IS_8X8(*mb_type);
467 unsigned int sub_mb_type;
470 assert(h->ref_list[1][0].reference & 3);
472 await_reference_mb_row(h, &h->ref_list[1][0],
473 h->mb_y + !!IS_INTERLACED(*mb_type));
475 if (IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
476 if (!IS_INTERLACED(*mb_type)) { // AFR/FR -> AFL/FL
477 mb_y = (h->mb_y & ~1) + h->col_parity;
479 ((h->mb_y & ~1) + h->col_parity) * h->mb_stride;
482 mb_y += h->col_fieldoff;
483 mb_xy += h->mb_stride * h->col_fieldoff; // non-zero for FL -> FL & differ parity
486 } else { // AFL/AFR/FR/FL -> AFR/FR
487 if (IS_INTERLACED(*mb_type)) { // AFL /FL -> AFR/FR
489 mb_xy = h->mb_x + (h->mb_y & ~1) * h->mb_stride;
490 mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
491 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + h->mb_stride];
492 b8_stride = 2 + 4 * h->mb_stride;
494 if (IS_INTERLACED(mb_type_col[0]) !=
495 IS_INTERLACED(mb_type_col[1])) {
496 mb_type_col[0] &= ~MB_TYPE_INTERLACED;
497 mb_type_col[1] &= ~MB_TYPE_INTERLACED;
500 sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
501 MB_TYPE_DIRECT2; /* B_SUB_8x8 */
503 if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) &&
504 (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) &&
506 *mb_type |= MB_TYPE_16x8 | MB_TYPE_L0L1 |
507 MB_TYPE_DIRECT2; /* B_16x8 */
509 *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1;
511 } else { // AFR/FR -> AFR/FR
514 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
516 sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
517 MB_TYPE_DIRECT2; /* B_SUB_8x8 */
518 if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) {
519 *mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
520 MB_TYPE_DIRECT2; /* B_16x16 */
521 } else if (!is_b8x8 &&
522 (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) {
523 *mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 |
524 (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16));
526 if (!h->sps.direct_8x8_inference_flag) {
527 /* FIXME: save sub mb types from previous frames (or derive
528 * from MVs) so we know exactly what block size to use */
529 sub_mb_type = MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
530 MB_TYPE_DIRECT2; /* B_SUB_4x4 */
532 *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1;
537 await_reference_mb_row(h, &h->ref_list[1][0], mb_y);
539 l1mv0 = &h->ref_list[1][0].motion_val[0][h->mb2b_xy[mb_xy]];
540 l1mv1 = &h->ref_list[1][0].motion_val[1][h->mb2b_xy[mb_xy]];
541 l1ref0 = &h->ref_list[1][0].ref_index[0][4 * mb_xy];
542 l1ref1 = &h->ref_list[1][0].ref_index[1][4 * mb_xy];
547 l1mv0 += 2 * b4_stride;
548 l1mv1 += 2 * b4_stride;
553 const int *map_col_to_list0[2] = { h->map_col_to_list0[0],
554 h->map_col_to_list0[1] };
555 const int *dist_scale_factor = h->dist_scale_factor;
558 if (FRAME_MBAFF(h) && IS_INTERLACED(*mb_type)) {
559 map_col_to_list0[0] = h->map_col_to_list0_field[h->mb_y & 1][0];
560 map_col_to_list0[1] = h->map_col_to_list0_field[h->mb_y & 1][1];
561 dist_scale_factor = h->dist_scale_factor_field[h->mb_y & 1];
563 ref_offset = (h->ref_list[1][0].mbaff << 4) & (mb_type_col[0] >> 3);
565 if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) {
566 int y_shift = 2 * !IS_INTERLACED(*mb_type);
567 assert(h->sps.direct_8x8_inference_flag);
569 for (i8 = 0; i8 < 4; i8++) {
570 const int x8 = i8 & 1;
571 const int y8 = i8 >> 1;
573 const int16_t (*l1mv)[2] = l1mv0;
575 if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
577 h->sub_mb_type[i8] = sub_mb_type;
579 fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
580 if (IS_INTRA(mb_type_col[y8])) {
581 fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
582 fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
583 fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
587 ref0 = l1ref0[x8 + y8 * b8_stride];
589 ref0 = map_col_to_list0[0][ref0 + ref_offset];
591 ref0 = map_col_to_list0[1][l1ref1[x8 + y8 * b8_stride] +
595 scale = dist_scale_factor[ref0];
596 fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
600 const int16_t *mv_col = l1mv[x8 * 3 + y8 * b4_stride];
601 int my_col = (mv_col[1] << y_shift) / 2;
602 int mx = (scale * mv_col[0] + 128) >> 8;
603 int my = (scale * my_col + 128) >> 8;
604 fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
605 pack16to32(mx, my), 4);
606 fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
607 pack16to32(mx - mv_col[0], my - my_col), 4);
613 /* one-to-one mv scaling */
615 if (IS_16X16(*mb_type)) {
618 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
619 if (IS_INTRA(mb_type_col[0])) {
622 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
623 : map_col_to_list0[1][l1ref1[0] + ref_offset];
624 const int scale = dist_scale_factor[ref0];
625 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
627 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
628 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
630 mv0 = pack16to32(mv_l0[0], mv_l0[1]);
631 mv1 = pack16to32(mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1]);
633 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
634 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
635 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
637 for (i8 = 0; i8 < 4; i8++) {
638 const int x8 = i8 & 1;
639 const int y8 = i8 >> 1;
641 const int16_t (*l1mv)[2] = l1mv0;
643 if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
645 h->sub_mb_type[i8] = sub_mb_type;
646 fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
647 if (IS_INTRA(mb_type_col[0])) {
648 fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
649 fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
650 fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
654 assert(b8_stride == 2);
657 ref0 = map_col_to_list0[0][ref0 + ref_offset];
659 ref0 = map_col_to_list0[1][l1ref1[i8] + ref_offset];
662 scale = dist_scale_factor[ref0];
664 fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
666 if (IS_SUB_8X8(sub_mb_type)) {
667 const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
668 int mx = (scale * mv_col[0] + 128) >> 8;
669 int my = (scale * mv_col[1] + 128) >> 8;
670 fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
671 pack16to32(mx, my), 4);
672 fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
673 pack16to32(mx - mv_col[0], my - mv_col[1]), 4);
675 for (i4 = 0; i4 < 4; i4++) {
676 const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
677 (y8 * 2 + (i4 >> 1)) * b4_stride];
678 int16_t *mv_l0 = h->mv_cache[0][scan8[i8 * 4 + i4]];
679 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
680 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
681 AV_WN32A(h->mv_cache[1][scan8[i8 * 4 + i4]],
682 pack16to32(mv_l0[0] - mv_col[0],
683 mv_l0[1] - mv_col[1]));
691 void ff_h264_pred_direct_motion(H264Context *const h, int *mb_type)
693 if (h->direct_spatial_mv_pred)
694 pred_spatial_direct_motion(h, mb_type);
696 pred_temp_direct_motion(h, mb_type);