+ const int mx = (s->mv[dir][0][0] >> shift) + 16 * s->mb_x + 8;
+ const int my = (s->mv[dir][0][1] >> shift) + 16 * s->mb_y;
+ int off = mx + (my + (s->mb_x & 3) * 4) * s->linesize + 64;
+
+ s->vdsp.prefetch(pix[0] + off, s->linesize, 4);
+ off = (mx >> 1) + ((my >> 1) + (s->mb_x & 7)) * s->uvlinesize + 64;
+ s->vdsp.prefetch(pix[1] + off, pix[2] - pix[1], 2);
+}
+
+static inline void apply_obmc(MpegEncContext *s,
+ uint8_t *dest_y,
+ uint8_t *dest_cb,
+ uint8_t *dest_cr,
+ uint8_t **ref_picture,
+ op_pixels_func (*pix_op)[4])
+{
+ LOCAL_ALIGNED_8(int16_t, mv_cache, [4], [4][2]);
+ Picture *cur_frame = &s->current_picture;
+ int mb_x = s->mb_x;
+ int mb_y = s->mb_y;
+ const int xy = mb_x + mb_y * s->mb_stride;
+ const int mot_stride = s->b8_stride;
+ const int mot_xy = mb_x * 2 + mb_y * 2 * mot_stride;
+ int mx, my, i;
+
+ assert(!s->mb_skipped);
+
+ AV_COPY32(mv_cache[1][1], cur_frame->motion_val[0][mot_xy]);
+ AV_COPY32(mv_cache[1][2], cur_frame->motion_val[0][mot_xy + 1]);
+
+ AV_COPY32(mv_cache[2][1],
+ cur_frame->motion_val[0][mot_xy + mot_stride]);
+ AV_COPY32(mv_cache[2][2],
+ cur_frame->motion_val[0][mot_xy + mot_stride + 1]);
+
+ AV_COPY32(mv_cache[3][1],
+ cur_frame->motion_val[0][mot_xy + mot_stride]);
+ AV_COPY32(mv_cache[3][2],
+ cur_frame->motion_val[0][mot_xy + mot_stride + 1]);
+
+ if (mb_y == 0 || IS_INTRA(cur_frame->mb_type[xy - s->mb_stride])) {
+ AV_COPY32(mv_cache[0][1], mv_cache[1][1]);
+ AV_COPY32(mv_cache[0][2], mv_cache[1][2]);
+ } else {
+ AV_COPY32(mv_cache[0][1],
+ cur_frame->motion_val[0][mot_xy - mot_stride]);
+ AV_COPY32(mv_cache[0][2],
+ cur_frame->motion_val[0][mot_xy - mot_stride + 1]);
+ }
+
+ if (mb_x == 0 || IS_INTRA(cur_frame->mb_type[xy - 1])) {
+ AV_COPY32(mv_cache[1][0], mv_cache[1][1]);
+ AV_COPY32(mv_cache[2][0], mv_cache[2][1]);
+ } else {
+ AV_COPY32(mv_cache[1][0], cur_frame->motion_val[0][mot_xy - 1]);
+ AV_COPY32(mv_cache[2][0],
+ cur_frame->motion_val[0][mot_xy - 1 + mot_stride]);
+ }
+
+ if (mb_x + 1 >= s->mb_width || IS_INTRA(cur_frame->mb_type[xy + 1])) {
+ AV_COPY32(mv_cache[1][3], mv_cache[1][2]);
+ AV_COPY32(mv_cache[2][3], mv_cache[2][2]);
+ } else {
+ AV_COPY32(mv_cache[1][3], cur_frame->motion_val[0][mot_xy + 2]);
+ AV_COPY32(mv_cache[2][3],
+ cur_frame->motion_val[0][mot_xy + 2 + mot_stride]);
+ }
+
+ mx = 0;
+ my = 0;
+ for (i = 0; i < 4; i++) {
+ const int x = (i & 1) + 1;
+ const int y = (i >> 1) + 1;
+ int16_t mv[5][2] = {
+ { mv_cache[y][x][0], mv_cache[y][x][1] },
+ { mv_cache[y - 1][x][0], mv_cache[y - 1][x][1] },
+ { mv_cache[y][x - 1][0], mv_cache[y][x - 1][1] },
+ { mv_cache[y][x + 1][0], mv_cache[y][x + 1][1] },
+ { mv_cache[y + 1][x][0], mv_cache[y + 1][x][1] }
+ };
+ // FIXME cleanup
+ obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
+ ref_picture[0],
+ mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >> 1) * 8,
+ pix_op[1],
+ mv);
+
+ mx += mv[0][0];
+ my += mv[0][1];
+ }
+ if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
+ chroma_4mv_motion(s, dest_cb, dest_cr,
+ ref_picture, pix_op[1],
+ mx, my);
+}
+
+static inline void apply_8x8(MpegEncContext *s,
+ uint8_t *dest_y,
+ uint8_t *dest_cb,
+ uint8_t *dest_cr,
+ int dir,
+ uint8_t **ref_picture,
+ qpel_mc_func (*qpix_op)[16],
+ op_pixels_func (*pix_op)[4])
+{
+ int dxy, mx, my, src_x, src_y;
+ int i;
+ int mb_x = s->mb_x;
+ int mb_y = s->mb_y;
+ uint8_t *ptr, *dest;
+
+ mx = 0;
+ my = 0;
+ if (s->quarter_sample) {
+ for (i = 0; i < 4; i++) {
+ int motion_x = s->mv[dir][i][0];
+ int motion_y = s->mv[dir][i][1];
+
+ dxy = ((motion_y & 3) << 2) | (motion_x & 3);
+ src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
+ src_y = mb_y * 16 + (motion_y >> 2) + (i >> 1) * 8;
+
+ /* WARNING: do no forget half pels */
+ src_x = av_clip(src_x, -16, s->width);
+ if (src_x == s->width)
+ dxy &= ~3;
+ src_y = av_clip(src_y, -16, s->height);
+ if (src_y == s->height)
+ dxy &= ~12;
+
+ ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
+ if ((unsigned)src_x > FFMAX(s->h_edge_pos - (motion_x & 3) - 8, 0) ||
+ (unsigned)src_y > FFMAX(s->v_edge_pos - (motion_y & 3) - 8, 0)) {
+ s->vdsp.emulated_edge_mc(s->sc.edge_emu_buffer, ptr,
+ s->linesize, s->linesize,
+ 9, 9,
+ src_x, src_y,
+ s->h_edge_pos,
+ s->v_edge_pos);
+ ptr = s->sc.edge_emu_buffer;
+ }
+ dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
+ qpix_op[1][dxy](dest, ptr, s->linesize);
+
+ mx += s->mv[dir][i][0] / 2;
+ my += s->mv[dir][i][1] / 2;
+ }
+ } else {
+ for (i = 0; i < 4; i++) {
+ hpel_motion(s,
+ dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
+ ref_picture[0],
+ mb_x * 16 + (i & 1) * 8,
+ mb_y * 16 + (i >> 1) * 8,
+ pix_op[1],
+ s->mv[dir][i][0],
+ s->mv[dir][i][1]);
+
+ mx += s->mv[dir][i][0];
+ my += s->mv[dir][i][1];
+ }
+ }
+
+ if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
+ chroma_4mv_motion(s, dest_cb, dest_cr,
+ ref_picture, pix_op[1], mx, my);