git.sesse.net Git - ffmpeg/blob - libavcodec/h264_direct.c

   1 /*
   2  * H.26L/H.264/AVC/JVT/14496-10/... direct mb/block decoding
   3  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * H.264 / AVC / MPEG4 part10 direct mb/block decoding.
  25  * @author Michael Niedermayer <michaelni@gmx.at>
  26  */
  27
  28 #include "internal.h"
  29 #include "avcodec.h"
  30 #include "h264.h"
  31 #include "mpegutils.h"
  32 #include "rectangle.h"
  33 #include "thread.h"
  34
  35 #include <assert.h>
  36
  37 static int get_scale_factor(H264Context *const h, int poc, int poc1, int i)
  38 {
  39     int poc0 = h->ref_list[0][i].poc;
  40     int td = av_clip(poc1 - poc0, -128, 127);
  41     if (td == 0 || h->ref_list[0][i].long_ref) {
  42         return 256;
  43     } else {
  44         int tb = av_clip(poc - poc0, -128, 127);
  45         int tx = (16384 + (FFABS(td) >> 1)) / td;
  46         return av_clip((tb * tx + 32) >> 6, -1024, 1023);
  47     }
  48 }
  49
  50 void ff_h264_direct_dist_scale_factor(H264Context *const h)
  51 {
  52     const int poc  = h->picture_structure == PICT_FRAME ?
  53         h->cur_pic_ptr->poc :
  54         h->cur_pic_ptr->field_poc[h->picture_structure == PICT_BOTTOM_FIELD];
  55     const int poc1 = h->ref_list[1][0].poc;
  56     int i, field;
  57
  58     if (FRAME_MBAFF(h))
  59         for (field = 0; field < 2; field++) {
  60             const int poc  = h->cur_pic_ptr->field_poc[field];
  61             const int poc1 = h->ref_list[1][0].field_poc[field];
  62             for (i = 0; i < 2 * h->ref_count[0]; i++)
  63                 h->dist_scale_factor_field[field][i ^ field] =
  64                     get_scale_factor(h, poc, poc1, i + 16);
  65         }
  66
  67     for (i = 0; i < h->ref_count[0]; i++)
  68         h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
  69 }
  70
  71 static void fill_colmap(H264Context *h, int map[2][16 + 32], int list,
  72                         int field, int colfield, int mbafi)
  73 {
  74     H264Picture *const ref1 = &h->ref_list[1][0];
  75     int j, old_ref, rfield;
  76     int start  = mbafi ? 16                       : 0;
  77     int end    = mbafi ? 16 + 2 * h->ref_count[0] : h->ref_count[0];
  78     int interl = mbafi || h->picture_structure != PICT_FRAME;
  79
  80     /* bogus; fills in for missing frames */
  81     memset(map[list], 0, sizeof(map[list]));
  82
  83     for (rfield = 0; rfield < 2; rfield++) {
  84         for (old_ref = 0; old_ref < ref1->ref_count[colfield][list]; old_ref++) {
  85             int poc = ref1->ref_poc[colfield][list][old_ref];
  86
  87             if (!interl)
  88                 poc |= 3;
  89             // FIXME: store all MBAFF references so this is not needed
  90             else if (interl && (poc & 3) == 3)
  91                 poc = (poc & ~3) + rfield + 1;
  92
  93             for (j = start; j < end; j++) {
  94                 if (4 * h->ref_list[0][j].frame_num +
  95                     (h->ref_list[0][j].reference & 3) == poc) {
  96                     int cur_ref = mbafi ? (j - 16) ^ field : j;
  97                     if (ref1->mbaff)
  98                         map[list][2 * old_ref + (rfield ^ field) + 16] = cur_ref;
  99                     if (rfield == field || !interl)
 100                         map[list][old_ref] = cur_ref;
 101                     break;
 102                 }
 103             }
 104         }
 105     }
 106 }
 107
 108 void ff_h264_direct_ref_list_init(H264Context *const h)
 109 {
 110     H264Picture *const ref1 = &h->ref_list[1][0];
 111     H264Picture *const cur = h->cur_pic_ptr;
 112     int list, j, field;
 113     int sidx     = (h->picture_structure & 1) ^ 1;
 114     int ref1sidx = (ref1->reference      & 1) ^ 1;
 115
 116     for (list = 0; list < 2; list++) {
 117         cur->ref_count[sidx][list] = h->ref_count[list];
 118         for (j = 0; j < h->ref_count[list]; j++)
 119             cur->ref_poc[sidx][list][j] = 4 * h->ref_list[list][j].frame_num +
 120                                           (h->ref_list[list][j].reference & 3);
 121     }
 122
 123     if (h->picture_structure == PICT_FRAME) {
 124         memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
 125         memcpy(cur->ref_poc[1],   cur->ref_poc[0],   sizeof(cur->ref_poc[0]));
 126     }
 127
 128     cur->mbaff = FRAME_MBAFF(h);
 129
 130     h->col_fieldoff = 0;
 131     if (h->picture_structure == PICT_FRAME) {
 132         int cur_poc  = h->cur_pic_ptr->poc;
 133         int *col_poc = h->ref_list[1]->field_poc;
 134         h->col_parity = (FFABS(col_poc[0] - cur_poc) >=
 135                          FFABS(col_poc[1] - cur_poc));
 136         ref1sidx =
 137         sidx     = h->col_parity;
 138     // FL -> FL & differ parity
 139     } else if (!(h->picture_structure & h->ref_list[1][0].reference) &&
 140                !h->ref_list[1][0].mbaff) {
 141         h->col_fieldoff = 2 * h->ref_list[1][0].reference - 3;
 142     }
 143
 144     if (h->slice_type_nos != AV_PICTURE_TYPE_B || h->direct_spatial_mv_pred)
 145         return;
 146
 147     for (list = 0; list < 2; list++) {
 148         fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
 149         if (FRAME_MBAFF(h))
 150             for (field = 0; field < 2; field++)
 151                 fill_colmap(h, h->map_col_to_list0_field[field], list, field,
 152                             field, 1);
 153     }
 154 }
 155
 156 static void await_reference_mb_row(H264Context *const h, H264Picture *ref,
 157                                    int mb_y)
 158 {
 159     int ref_field         = ref->reference - 1;
 160     int ref_field_picture = ref->field_picture;
 161     int ref_height        = 16 * h->mb_height >> ref_field_picture;
 162
 163     if (!HAVE_THREADS || !(h->avctx->active_thread_type & FF_THREAD_FRAME))
 164         return;
 165
 166     /* FIXME: It can be safe to access mb stuff
 167      * even if pixels aren't deblocked yet. */
 168
 169     ff_thread_await_progress(&ref->tf,
 170                              FFMIN(16 * mb_y >> ref_field_picture,
 171                                    ref_height - 1),
 172                              ref_field_picture && ref_field);
 173 }
 174
 175 static void pred_spatial_direct_motion(H264Context *const h, int *mb_type)
 176 {
 177     int b8_stride = 2;
 178     int b4_stride = h->b_stride;
 179     int mb_xy = h->mb_xy, mb_y = h->mb_y;
 180     int mb_type_col[2];
 181     const int16_t (*l1mv0)[2], (*l1mv1)[2];
 182     const int8_t *l1ref0, *l1ref1;
 183     const int is_b8x8 = IS_8X8(*mb_type);
 184     unsigned int sub_mb_type = MB_TYPE_L0L1;
 185     int i8, i4;
 186     int ref[2];
 187     int mv[2];
 188     int list;
 189
 190     assert(h->ref_list[1][0].reference & 3);
 191
 192     await_reference_mb_row(h, &h->ref_list[1][0],
 193                            h->mb_y + !!IS_INTERLACED(*mb_type));
 194
 195 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16 | MB_TYPE_INTRA4x4 | \
 196                                 MB_TYPE_INTRA16x16 | MB_TYPE_INTRA_PCM)
 197
 198     /* ref = min(neighbors) */
 199     for (list = 0; list < 2; list++) {
 200         int left_ref     = h->ref_cache[list][scan8[0] - 1];
 201         int top_ref      = h->ref_cache[list][scan8[0] - 8];
 202         int refc         = h->ref_cache[list][scan8[0] - 8 + 4];
 203         const int16_t *C = h->mv_cache[list][scan8[0]  - 8 + 4];
 204         if (refc == PART_NOT_AVAILABLE) {
 205             refc = h->ref_cache[list][scan8[0] - 8 - 1];
 206             C    = h->mv_cache[list][scan8[0]  - 8 - 1];
 207         }
 208         ref[list] = FFMIN3((unsigned)left_ref,
 209                            (unsigned)top_ref,
 210                            (unsigned)refc);
 211         if (ref[list] >= 0) {
 212             /* This is just pred_motion() but with the cases removed that
 213              * cannot happen for direct blocks. */
 214             const int16_t *const A = h->mv_cache[list][scan8[0] - 1];
 215             const int16_t *const B = h->mv_cache[list][scan8[0] - 8];
 216
 217             int match_count = (left_ref == ref[list]) +
 218                               (top_ref  == ref[list]) +
 219                               (refc     == ref[list]);
 220
 221             if (match_count > 1) { // most common
 222                 mv[list] = pack16to32(mid_pred(A[0], B[0], C[0]),
 223                                       mid_pred(A[1], B[1], C[1]));
 224             } else {
 225                 assert(match_count == 1);
 226                 if (left_ref == ref[list])
 227                     mv[list] = AV_RN32A(A);
 228                 else if (top_ref == ref[list])
 229                     mv[list] = AV_RN32A(B);
 230                 else
 231                     mv[list] = AV_RN32A(C);
 232             }
 233             av_assert2(ref[list] < (h->ref_count[list] << !!FRAME_MBAFF(h)));
 234         } else {
 235             int mask = ~(MB_TYPE_L0 << (2 * list));
 236             mv[list]  = 0;
 237             ref[list] = -1;
 238             if (!is_b8x8)
 239                 *mb_type &= mask;
 240             sub_mb_type &= mask;
 241         }
 242     }
 243     if (ref[0] < 0 && ref[1] < 0) {
 244         ref[0] = ref[1] = 0;
 245         if (!is_b8x8)
 246             *mb_type |= MB_TYPE_L0L1;
 247         sub_mb_type |= MB_TYPE_L0L1;
 248     }
 249
 250     if (!(is_b8x8 | mv[0] | mv[1])) {
 251         fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
 252         fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
 253         fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
 254         fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
 255         *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
 256                                  MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
 257                    MB_TYPE_16x16 | MB_TYPE_DIRECT2;
 258         return;
 259     }
 260
 261     if (IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
 262         if (!IS_INTERLACED(*mb_type)) {                    //     AFR/FR    -> AFL/FL
 263             mb_y  = (h->mb_y & ~1) + h->col_parity;
 264             mb_xy = h->mb_x +
 265                     ((h->mb_y & ~1) + h->col_parity) * h->mb_stride;
 266             b8_stride = 0;
 267         } else {
 268             mb_y  += h->col_fieldoff;
 269             mb_xy += h->mb_stride * h->col_fieldoff; // non-zero for FL -> FL & differ parity
 270         }
 271         goto single_col;
 272     } else {                                             // AFL/AFR/FR/FL -> AFR/FR
 273         if (IS_INTERLACED(*mb_type)) {                   // AFL       /FL -> AFR/FR
 274             mb_y           =  h->mb_y & ~1;
 275             mb_xy          = (h->mb_y & ~1) * h->mb_stride + h->mb_x;
 276             mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
 277             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + h->mb_stride];
 278             b8_stride      = 2 + 4 * h->mb_stride;
 279             b4_stride     *= 6;
 280             if (IS_INTERLACED(mb_type_col[0]) !=
 281                 IS_INTERLACED(mb_type_col[1])) {
 282                 mb_type_col[0] &= ~MB_TYPE_INTERLACED;
 283                 mb_type_col[1] &= ~MB_TYPE_INTERLACED;
 284             }
 285
 286             sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */
 287             if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) &&
 288                 (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) &&
 289                 !is_b8x8) {
 290                 *mb_type |= MB_TYPE_16x8 | MB_TYPE_DIRECT2;  /* B_16x8 */
 291             } else {
 292                 *mb_type |= MB_TYPE_8x8;
 293             }
 294         } else {                                         //     AFR/FR    -> AFR/FR
 295 single_col:
 296             mb_type_col[0] =
 297             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
 298
 299             sub_mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_SUB_8x8 */
 300             if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) {
 301                 *mb_type |= MB_TYPE_16x16 | MB_TYPE_DIRECT2; /* B_16x16 */
 302             } else if (!is_b8x8 &&
 303                        (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) {
 304                 *mb_type |= MB_TYPE_DIRECT2 |
 305                             (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16));
 306             } else {
 307                 if (!h->sps.direct_8x8_inference_flag) {
 308                     /* FIXME: Save sub mb types from previous frames (or derive
 309                      * from MVs) so we know exactly what block size to use. */
 310                     sub_mb_type += (MB_TYPE_8x8 - MB_TYPE_16x16); /* B_SUB_4x4 */
 311                 }
 312                 *mb_type |= MB_TYPE_8x8;
 313             }
 314         }
 315     }
 316
 317     await_reference_mb_row(h, &h->ref_list[1][0], mb_y);
 318
 319     l1mv0  = (void*)&h->ref_list[1][0].motion_val[0][h->mb2b_xy[mb_xy]];
 320     l1mv1  = (void*)&h->ref_list[1][0].motion_val[1][h->mb2b_xy[mb_xy]];
 321     l1ref0 = &h->ref_list[1][0].ref_index[0][4 * mb_xy];
 322     l1ref1 = &h->ref_list[1][0].ref_index[1][4 * mb_xy];
 323     if (!b8_stride) {
 324         if (h->mb_y & 1) {
 325             l1ref0 += 2;
 326             l1ref1 += 2;
 327             l1mv0  += 2 * b4_stride;
 328             l1mv1  += 2 * b4_stride;
 329         }
 330     }
 331
 332     if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) {
 333         int n = 0;
 334         for (i8 = 0; i8 < 4; i8++) {
 335             int x8  = i8 & 1;
 336             int y8  = i8 >> 1;
 337             int xy8 = x8     + y8 * b8_stride;
 338             int xy4 = x8 * 3 + y8 * b4_stride;
 339             int a, b;
 340
 341             if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
 342                 continue;
 343             h->sub_mb_type[i8] = sub_mb_type;
 344
 345             fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
 346                            (uint8_t)ref[0], 1);
 347             fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
 348                            (uint8_t)ref[1], 1);
 349             if (!IS_INTRA(mb_type_col[y8]) && !h->ref_list[1][0].long_ref &&
 350                 ((l1ref0[xy8] == 0 &&
 351                   FFABS(l1mv0[xy4][0]) <= 1 &&
 352                   FFABS(l1mv0[xy4][1]) <= 1) ||
 353                  (l1ref0[xy8] < 0 &&
 354                   l1ref1[xy8] == 0 &&
 355                   FFABS(l1mv1[xy4][0]) <= 1 &&
 356                   FFABS(l1mv1[xy4][1]) <= 1))) {
 357                 a =
 358                 b = 0;
 359                 if (ref[0] > 0)
 360                     a = mv[0];
 361                 if (ref[1] > 0)
 362                     b = mv[1];
 363                 n++;
 364             } else {
 365                 a = mv[0];
 366                 b = mv[1];
 367             }
 368             fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, a, 4);
 369             fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, b, 4);
 370         }
 371         if (!is_b8x8 && !(n & 3))
 372             *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
 373                                      MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
 374                        MB_TYPE_16x16 | MB_TYPE_DIRECT2;
 375     } else if (IS_16X16(*mb_type)) {
 376         int a, b;
 377
 378         fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
 379         fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
 380         if (!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref &&
 381             ((l1ref0[0] == 0 &&
 382               FFABS(l1mv0[0][0]) <= 1 &&
 383               FFABS(l1mv0[0][1]) <= 1) ||
 384              (l1ref0[0] < 0 && !l1ref1[0] &&
 385               FFABS(l1mv1[0][0]) <= 1 &&
 386               FFABS(l1mv1[0][1]) <= 1 &&
 387               h->x264_build > 33U))) {
 388             a = b = 0;
 389             if (ref[0] > 0)
 390                 a = mv[0];
 391             if (ref[1] > 0)
 392                 b = mv[1];
 393         } else {
 394             a = mv[0];
 395             b = mv[1];
 396         }
 397         fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
 398         fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
 399     } else {
 400         int n = 0;
 401         for (i8 = 0; i8 < 4; i8++) {
 402             const int x8 = i8 & 1;
 403             const int y8 = i8 >> 1;
 404
 405             if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
 406                 continue;
 407             h->sub_mb_type[i8] = sub_mb_type;
 408
 409             fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, mv[0], 4);
 410             fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, mv[1], 4);
 411             fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
 412                            (uint8_t)ref[0], 1);
 413             fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8,
 414                            (uint8_t)ref[1], 1);
 415
 416             assert(b8_stride == 2);
 417             /* col_zero_flag */
 418             if (!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref &&
 419                 (l1ref0[i8] == 0 ||
 420                  (l1ref0[i8] < 0 &&
 421                   l1ref1[i8] == 0 &&
 422                   h->x264_build > 33U))) {
 423                 const int16_t (*l1mv)[2] = l1ref0[i8] == 0 ? l1mv0 : l1mv1;
 424                 if (IS_SUB_8X8(sub_mb_type)) {
 425                     const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
 426                     if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
 427                         if (ref[0] == 0)
 428                             fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2,
 429                                            8, 0, 4);
 430                         if (ref[1] == 0)
 431                             fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2,
 432                                            8, 0, 4);
 433                         n += 4;
 434                     }
 435                 } else {
 436                     int m = 0;
 437                     for (i4 = 0; i4 < 4; i4++) {
 438                         const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
 439                                                      (y8 * 2 + (i4 >> 1)) * b4_stride];
 440                         if (FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1) {
 441                             if (ref[0] == 0)
 442                                 AV_ZERO32(h->mv_cache[0][scan8[i8 * 4 + i4]]);
 443                             if (ref[1] == 0)
 444                                 AV_ZERO32(h->mv_cache[1][scan8[i8 * 4 + i4]]);
 445                             m++;
 446                         }
 447                     }
 448                     if (!(m & 3))
 449                         h->sub_mb_type[i8] += MB_TYPE_16x16 - MB_TYPE_8x8;
 450                     n += m;
 451                 }
 452             }
 453         }
 454         if (!is_b8x8 && !(n & 15))
 455             *mb_type = (*mb_type & ~(MB_TYPE_8x8 | MB_TYPE_16x8 | MB_TYPE_8x16 |
 456                                      MB_TYPE_P1L0 | MB_TYPE_P1L1)) |
 457                        MB_TYPE_16x16 | MB_TYPE_DIRECT2;
 458     }
 459 }
 460
 461 static void pred_temp_direct_motion(H264Context *const h, int *mb_type)
 462 {
 463     int b8_stride = 2;
 464     int b4_stride = h->b_stride;
 465     int mb_xy = h->mb_xy, mb_y = h->mb_y;
 466     int mb_type_col[2];
 467     const int16_t (*l1mv0)[2], (*l1mv1)[2];
 468     const int8_t *l1ref0, *l1ref1;
 469     const int is_b8x8 = IS_8X8(*mb_type);
 470     unsigned int sub_mb_type;
 471     int i8, i4;
 472
 473     assert(h->ref_list[1][0].reference & 3);
 474
 475     await_reference_mb_row(h, &h->ref_list[1][0],
 476                            h->mb_y + !!IS_INTERLACED(*mb_type));
 477
 478     if (IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
 479         if (!IS_INTERLACED(*mb_type)) {                    //     AFR/FR    -> AFL/FL
 480             mb_y  = (h->mb_y & ~1) + h->col_parity;
 481             mb_xy = h->mb_x +
 482                     ((h->mb_y & ~1) + h->col_parity) * h->mb_stride;
 483             b8_stride = 0;
 484         } else {
 485             mb_y  += h->col_fieldoff;
 486             mb_xy += h->mb_stride * h->col_fieldoff; // non-zero for FL -> FL & differ parity
 487         }
 488         goto single_col;
 489     } else {                                        // AFL/AFR/FR/FL -> AFR/FR
 490         if (IS_INTERLACED(*mb_type)) {              // AFL       /FL -> AFR/FR
 491             mb_y           = h->mb_y & ~1;
 492             mb_xy          = h->mb_x + (h->mb_y & ~1) * h->mb_stride;
 493             mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
 494             mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + h->mb_stride];
 495             b8_stride      = 2 + 4 * h->mb_stride;
 496             b4_stride     *= 6;
 497             if (IS_INTERLACED(mb_type_col[0]) !=
 498                 IS_INTERLACED(mb_type_col[1])) {
 499                 mb_type_col[0] &= ~MB_TYPE_INTERLACED;
 500                 mb_type_col[1] &= ~MB_TYPE_INTERLACED;
 501             }
 502
 503             sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
 504                           MB_TYPE_DIRECT2;                  /* B_SUB_8x8 */
 505
 506             if ((mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) &&
 507                 (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) &&
 508                 !is_b8x8) {
 509                 *mb_type |= MB_TYPE_16x8 | MB_TYPE_L0L1 |
 510                             MB_TYPE_DIRECT2;                /* B_16x8 */
 511             } else {
 512                 *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1;
 513             }
 514         } else {                                    //     AFR/FR    -> AFR/FR
 515 single_col:
 516             mb_type_col[0]     =
 517                 mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
 518
 519             sub_mb_type = MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
 520                           MB_TYPE_DIRECT2;                  /* B_SUB_8x8 */
 521             if (!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)) {
 522                 *mb_type |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
 523                             MB_TYPE_DIRECT2;                /* B_16x16 */
 524             } else if (!is_b8x8 &&
 525                        (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16))) {
 526                 *mb_type |= MB_TYPE_L0L1 | MB_TYPE_DIRECT2 |
 527                             (mb_type_col[0] & (MB_TYPE_16x8 | MB_TYPE_8x16));
 528             } else {
 529                 if (!h->sps.direct_8x8_inference_flag) {
 530                     /* FIXME: save sub mb types from previous frames (or derive
 531                      * from MVs) so we know exactly what block size to use */
 532                     sub_mb_type = MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 |
 533                                   MB_TYPE_DIRECT2;          /* B_SUB_4x4 */
 534                 }
 535                 *mb_type |= MB_TYPE_8x8 | MB_TYPE_L0L1;
 536             }
 537         }
 538     }
 539
 540     await_reference_mb_row(h, &h->ref_list[1][0], mb_y);
 541
 542     l1mv0  = (void*)&h->ref_list[1][0].motion_val[0][h->mb2b_xy[mb_xy]];
 543     l1mv1  = (void*)&h->ref_list[1][0].motion_val[1][h->mb2b_xy[mb_xy]];
 544     l1ref0 = &h->ref_list[1][0].ref_index[0][4 * mb_xy];
 545     l1ref1 = &h->ref_list[1][0].ref_index[1][4 * mb_xy];
 546     if (!b8_stride) {
 547         if (h->mb_y & 1) {
 548             l1ref0 += 2;
 549             l1ref1 += 2;
 550             l1mv0  += 2 * b4_stride;
 551             l1mv1  += 2 * b4_stride;
 552         }
 553     }
 554
 555     {
 556         const int *map_col_to_list0[2] = { h->map_col_to_list0[0],
 557                                            h->map_col_to_list0[1] };
 558         const int *dist_scale_factor = h->dist_scale_factor;
 559         int ref_offset;
 560
 561         if (FRAME_MBAFF(h) && IS_INTERLACED(*mb_type)) {
 562             map_col_to_list0[0] = h->map_col_to_list0_field[h->mb_y & 1][0];
 563             map_col_to_list0[1] = h->map_col_to_list0_field[h->mb_y & 1][1];
 564             dist_scale_factor   = h->dist_scale_factor_field[h->mb_y & 1];
 565         }
 566         ref_offset = (h->ref_list[1][0].mbaff << 4) & (mb_type_col[0] >> 3);
 567
 568         if (IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])) {
 569             int y_shift = 2 * !IS_INTERLACED(*mb_type);
 570             assert(h->sps.direct_8x8_inference_flag);
 571
 572             for (i8 = 0; i8 < 4; i8++) {
 573                 const int x8 = i8 & 1;
 574                 const int y8 = i8 >> 1;
 575                 int ref0, scale;
 576                 const int16_t (*l1mv)[2] = l1mv0;
 577
 578                 if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
 579                     continue;
 580                 h->sub_mb_type[i8] = sub_mb_type;
 581
 582                 fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
 583                 if (IS_INTRA(mb_type_col[y8])) {
 584                     fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
 585                     fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
 586                     fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
 587                     continue;
 588                 }
 589
 590                 ref0 = l1ref0[x8 + y8 * b8_stride];
 591                 if (ref0 >= 0)
 592                     ref0 = map_col_to_list0[0][ref0 + ref_offset];
 593                 else {
 594                     ref0 = map_col_to_list0[1][l1ref1[x8 + y8 * b8_stride] +
 595                                                ref_offset];
 596                     l1mv = l1mv1;
 597                 }
 598                 scale = dist_scale_factor[ref0];
 599                 fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
 600                                ref0, 1);
 601
 602                 {
 603                     const int16_t *mv_col = l1mv[x8 * 3 + y8 * b4_stride];
 604                     int my_col            = (mv_col[1] << y_shift) / 2;
 605                     int mx                = (scale * mv_col[0] + 128) >> 8;
 606                     int my                = (scale * my_col    + 128) >> 8;
 607                     fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
 608                                    pack16to32(mx, my), 4);
 609                     fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
 610                                    pack16to32(mx - mv_col[0], my - my_col), 4);
 611                 }
 612             }
 613             return;
 614         }
 615
 616         /* one-to-one mv scaling */
 617
 618         if (IS_16X16(*mb_type)) {
 619             int ref, mv0, mv1;
 620
 621             fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
 622             if (IS_INTRA(mb_type_col[0])) {
 623                 ref = mv0 = mv1 = 0;
 624             } else {
 625                 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
 626                                                 : map_col_to_list0[1][l1ref1[0] + ref_offset];
 627                 const int scale = dist_scale_factor[ref0];
 628                 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
 629                 int mv_l0[2];
 630                 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
 631                 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
 632                 ref      = ref0;
 633                 mv0      = pack16to32(mv_l0[0], mv_l0[1]);
 634                 mv1      = pack16to32(mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1]);
 635             }
 636             fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
 637             fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
 638             fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
 639         } else {
 640             for (i8 = 0; i8 < 4; i8++) {
 641                 const int x8 = i8 & 1;
 642                 const int y8 = i8 >> 1;
 643                 int ref0, scale;
 644                 const int16_t (*l1mv)[2] = l1mv0;
 645
 646                 if (is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
 647                     continue;
 648                 h->sub_mb_type[i8] = sub_mb_type;
 649                 fill_rectangle(&h->ref_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 1);
 650                 if (IS_INTRA(mb_type_col[0])) {
 651                     fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 1);
 652                     fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8, 0, 4);
 653                     fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8, 0, 4);
 654                     continue;
 655                 }
 656
 657                 assert(b8_stride == 2);
 658                 ref0 = l1ref0[i8];
 659                 if (ref0 >= 0)
 660                     ref0 = map_col_to_list0[0][ref0 + ref_offset];
 661                 else {
 662                     ref0 = map_col_to_list0[1][l1ref1[i8] + ref_offset];
 663                     l1mv = l1mv1;
 664                 }
 665                 scale = dist_scale_factor[ref0];
 666
 667                 fill_rectangle(&h->ref_cache[0][scan8[i8 * 4]], 2, 2, 8,
 668                                ref0, 1);
 669                 if (IS_SUB_8X8(sub_mb_type)) {
 670                     const int16_t *mv_col = l1mv[x8 * 3 + y8 * 3 * b4_stride];
 671                     int mx                = (scale * mv_col[0] + 128) >> 8;
 672                     int my                = (scale * mv_col[1] + 128) >> 8;
 673                     fill_rectangle(&h->mv_cache[0][scan8[i8 * 4]], 2, 2, 8,
 674                                    pack16to32(mx, my), 4);
 675                     fill_rectangle(&h->mv_cache[1][scan8[i8 * 4]], 2, 2, 8,
 676                                    pack16to32(mx - mv_col[0], my - mv_col[1]), 4);
 677                 } else {
 678                     for (i4 = 0; i4 < 4; i4++) {
 679                         const int16_t *mv_col = l1mv[x8 * 2 + (i4 & 1) +
 680                                                      (y8 * 2 + (i4 >> 1)) * b4_stride];
 681                         int16_t *mv_l0 = h->mv_cache[0][scan8[i8 * 4 + i4]];
 682                         mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
 683                         mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
 684                         AV_WN32A(h->mv_cache[1][scan8[i8 * 4 + i4]],
 685                                  pack16to32(mv_l0[0] - mv_col[0],
 686                                             mv_l0[1] - mv_col[1]));
 687                     }
 688                 }
 689             }
 690         }
 691     }
 692 }
 693
 694 void ff_h264_pred_direct_motion(H264Context *const h, int *mb_type)
 695 {
 696     if (h->direct_spatial_mv_pred)
 697         pred_spatial_direct_motion(h, mb_type);
 698     else
 699         pred_temp_direct_motion(h, mb_type);
 700 }