]> git.sesse.net Git - ffmpeg/blob - libavcodec/vp9_mc_template.c
Merge commit 'ae365453c370c85f278bff7fbf9e20d9d335cb2a'
[ffmpeg] / libavcodec / vp9_mc_template.c
1 /*
2  * VP9 compatible video decoder
3  *
4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23
24 #define ROUNDED_DIV_MVx2(a, b) \
25     (VP56mv) { .x = ROUNDED_DIV(a.x + b.x, 2), .y = ROUNDED_DIV(a.y + b.y, 2) }
26 #define ROUNDED_DIV_MVx4(a, b, c, d) \
27     (VP56mv) { .x = ROUNDED_DIV(a.x + b.x + c.x + d.x, 4), \
28                .y = ROUNDED_DIV(a.y + b.y + c.y + d.y, 4) }
29
30 static void FN(inter_pred)(AVCodecContext *ctx)
31 {
32     static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
33         { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
34         { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
35     };
36     VP9Context *s = ctx->priv_data;
37     VP9Block *b = s->b;
38     int row = s->row, col = s->col;
39     ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
40     AVFrame *ref1 = tref1->f, *ref2;
41     int w1 = ref1->width, h1 = ref1->height, w2, h2;
42     ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
43     int bytesperpixel = BYTES_PER_PIXEL;
44
45     if (b->comp) {
46         tref2 = &s->refs[s->refidx[b->ref[1]]];
47         ref2 = tref2->f;
48         w2 = ref2->width;
49         h2 = ref2->height;
50     }
51
52     // y inter pred
53     if (b->bs > BS_8x8) {
54         VP56mv uvmv;
55
56 #if SCALED == 0
57         if (b->bs == BS_8x4) {
58             mc_luma_dir(s, mc[3][b->filter][0], s->dst[0], ls_y,
59                         ref1->data[0], ref1->linesize[0], tref1,
60                         row << 3, col << 3, &b->mv[0][0],,,,, 8, 4, w1, h1, 0);
61             mc_luma_dir(s, mc[3][b->filter][0],
62                         s->dst[0] + 4 * ls_y, ls_y,
63                         ref1->data[0], ref1->linesize[0], tref1,
64                         (row << 3) + 4, col << 3, &b->mv[2][0],,,,, 8, 4, w1, h1, 0);
65             w1 = (w1 + s->ss_h) >> s->ss_h;
66             if (s->ss_v) {
67                 h1 = (h1 + 1) >> 1;
68                 uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
69                 mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
70                               s->dst[1], s->dst[2], ls_uv,
71                               ref1->data[1], ref1->linesize[1],
72                               ref1->data[2], ref1->linesize[2], tref1,
73                               row << 2, col << (3 - s->ss_h),
74                               &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
75             } else {
76                 mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
77                               s->dst[1], s->dst[2], ls_uv,
78                               ref1->data[1], ref1->linesize[1],
79                               ref1->data[2], ref1->linesize[2], tref1,
80                               row << 3, col << (3 - s->ss_h),
81                               &b->mv[0][0],,,,, 8 >> s->ss_h, 4, w1, h1, 0);
82                 // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
83                 // to get the motion vector for the bottom 4x4 block
84                 // https://code.google.com/p/webm/issues/detail?id=993
85                 if (s->ss_h == 0) {
86                     uvmv = b->mv[2][0];
87                 } else {
88                     uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
89                 }
90                 mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
91                               s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
92                               ref1->data[1], ref1->linesize[1],
93                               ref1->data[2], ref1->linesize[2], tref1,
94                               (row << 3) + 4, col << (3 - s->ss_h),
95                               &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
96             }
97
98             if (b->comp) {
99                 mc_luma_dir(s, mc[3][b->filter][1], s->dst[0], ls_y,
100                             ref2->data[0], ref2->linesize[0], tref2,
101                             row << 3, col << 3, &b->mv[0][1],,,,, 8, 4, w2, h2, 1);
102                 mc_luma_dir(s, mc[3][b->filter][1],
103                             s->dst[0] + 4 * ls_y, ls_y,
104                             ref2->data[0], ref2->linesize[0], tref2,
105                             (row << 3) + 4, col << 3, &b->mv[2][1],,,,, 8, 4, w2, h2, 1);
106                 w2 = (w2 + s->ss_h) >> s->ss_h;
107                 if (s->ss_v) {
108                     h2 = (h2 + 1) >> 1;
109                     uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
110                     mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
111                                   s->dst[1], s->dst[2], ls_uv,
112                                   ref2->data[1], ref2->linesize[1],
113                                   ref2->data[2], ref2->linesize[2], tref2,
114                                   row << 2, col << (3 - s->ss_h),
115                                   &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
116                 } else {
117                     mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
118                                   s->dst[1], s->dst[2], ls_uv,
119                                   ref2->data[1], ref2->linesize[1],
120                                   ref2->data[2], ref2->linesize[2], tref2,
121                                   row << 3, col << (3 - s->ss_h),
122                                   &b->mv[0][1],,,,, 8 >> s->ss_h, 4, w2, h2, 1);
123                     // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
124                     // to get the motion vector for the bottom 4x4 block
125                     // https://code.google.com/p/webm/issues/detail?id=993
126                     if (s->ss_h == 0) {
127                         uvmv = b->mv[2][1];
128                     } else {
129                         uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
130                     }
131                     mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
132                                   s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
133                                   ref2->data[1], ref2->linesize[1],
134                                   ref2->data[2], ref2->linesize[2], tref2,
135                                   (row << 3) + 4, col << (3 - s->ss_h),
136                                   &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
137                 }
138             }
139         } else if (b->bs == BS_4x8) {
140             mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
141                         ref1->data[0], ref1->linesize[0], tref1,
142                         row << 3, col << 3, &b->mv[0][0],,,,, 4, 8, w1, h1, 0);
143             mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
144                         ref1->data[0], ref1->linesize[0], tref1,
145                         row << 3, (col << 3) + 4, &b->mv[1][0],,,,, 4, 8, w1, h1, 0);
146             h1 = (h1 + s->ss_v) >> s->ss_v;
147             if (s->ss_h) {
148                 w1 = (w1 + 1) >> 1;
149                 uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
150                 mc_chroma_dir(s, mc[4][b->filter][0],
151                               s->dst[1], s->dst[2], ls_uv,
152                               ref1->data[1], ref1->linesize[1],
153                               ref1->data[2], ref1->linesize[2], tref1,
154                               row << (3 - s->ss_v), col << 2,
155                               &uvmv,,,,, 4, 8 >> s->ss_v, w1, h1, 0);
156             } else {
157                 mc_chroma_dir(s, mc[4][b->filter][0],
158                               s->dst[1], s->dst[2], ls_uv,
159                               ref1->data[1], ref1->linesize[1],
160                               ref1->data[2], ref1->linesize[2], tref1,
161                               row << (3 - s->ss_v), col << 3,
162                               &b->mv[0][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
163                 mc_chroma_dir(s, mc[4][b->filter][0],
164                               s->dst[1] + 4 * bytesperpixel,
165                               s->dst[2] + 4 * bytesperpixel, ls_uv,
166                               ref1->data[1], ref1->linesize[1],
167                               ref1->data[2], ref1->linesize[2], tref1,
168                               row << (3 - s->ss_v), (col << 3) + 4,
169                               &b->mv[1][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
170             }
171
172             if (b->comp) {
173                 mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
174                             ref2->data[0], ref2->linesize[0], tref2,
175                             row << 3, col << 3, &b->mv[0][1],,,,, 4, 8, w2, h2, 1);
176                 mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
177                             ref2->data[0], ref2->linesize[0], tref2,
178                             row << 3, (col << 3) + 4, &b->mv[1][1],,,,, 4, 8, w2, h2, 1);
179                 h2 = (h2 + s->ss_v) >> s->ss_v;
180                 if (s->ss_h) {
181                     w2 = (w2 + 1) >> 1;
182                     uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
183                     mc_chroma_dir(s, mc[4][b->filter][1],
184                                   s->dst[1], s->dst[2], ls_uv,
185                                   ref2->data[1], ref2->linesize[1],
186                                   ref2->data[2], ref2->linesize[2], tref2,
187                                   row << (3 - s->ss_v), col << 2,
188                                   &uvmv,,,,, 4, 8 >> s->ss_v, w2, h2, 1);
189                 } else {
190                     mc_chroma_dir(s, mc[4][b->filter][1],
191                                   s->dst[1], s->dst[2], ls_uv,
192                                   ref2->data[1], ref2->linesize[1],
193                                   ref2->data[2], ref2->linesize[2], tref2,
194                                   row << (3 - s->ss_v), col << 3,
195                                   &b->mv[0][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
196                     mc_chroma_dir(s, mc[4][b->filter][1],
197                                   s->dst[1] + 4 * bytesperpixel,
198                                   s->dst[2] + 4 * bytesperpixel, ls_uv,
199                                   ref2->data[1], ref2->linesize[1],
200                                   ref2->data[2], ref2->linesize[2], tref2,
201                                   row << (3 - s->ss_v), (col << 3) + 4,
202                                   &b->mv[1][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
203                 }
204             }
205         } else
206 #endif
207         {
208             av_assert2(b->bs == BS_4x4);
209
210             // FIXME if two horizontally adjacent blocks have the same MV,
211             // do a w8 instead of a w4 call
212             mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
213                         ref1->data[0], ref1->linesize[0], tref1,
214                         row << 3, col << 3, &b->mv[0][0],
215                         0, 0, 8, 8, 4, 4, w1, h1, 0);
216             mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
217                         ref1->data[0], ref1->linesize[0], tref1,
218                         row << 3, (col << 3) + 4, &b->mv[1][0],
219                         4, 0, 8, 8, 4, 4, w1, h1, 0);
220             mc_luma_dir(s, mc[4][b->filter][0],
221                         s->dst[0] + 4 * ls_y, ls_y,
222                         ref1->data[0], ref1->linesize[0], tref1,
223                         (row << 3) + 4, col << 3, &b->mv[2][0],
224                         0, 4, 8, 8, 4, 4, w1, h1, 0);
225             mc_luma_dir(s, mc[4][b->filter][0],
226                         s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
227                         ref1->data[0], ref1->linesize[0], tref1,
228                         (row << 3) + 4, (col << 3) + 4, &b->mv[3][0],
229                         4, 4, 8, 8, 4, 4, w1, h1, 0);
230             if (s->ss_v) {
231                 h1 = (h1 + 1) >> 1;
232                 if (s->ss_h) {
233                     w1 = (w1 + 1) >> 1;
234                     uvmv = ROUNDED_DIV_MVx4(b->mv[0][0], b->mv[1][0],
235                                             b->mv[2][0], b->mv[3][0]);
236                     mc_chroma_dir(s, mc[4][b->filter][0],
237                                   s->dst[1], s->dst[2], ls_uv,
238                                   ref1->data[1], ref1->linesize[1],
239                                   ref1->data[2], ref1->linesize[2], tref1,
240                                   row << 2, col << 2,
241                                   &uvmv, 0, 0, 4, 4, 4, 4, w1, h1, 0);
242                 } else {
243                     uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
244                     mc_chroma_dir(s, mc[4][b->filter][0],
245                                   s->dst[1], s->dst[2], ls_uv,
246                                   ref1->data[1], ref1->linesize[1],
247                                   ref1->data[2], ref1->linesize[2], tref1,
248                                   row << 2, col << 3,
249                                   &uvmv, 0, 0, 8, 4, 4, 4, w1, h1, 0);
250                     uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
251                     mc_chroma_dir(s, mc[4][b->filter][0],
252                                   s->dst[1] + 4 * bytesperpixel,
253                                   s->dst[2] + 4 * bytesperpixel, ls_uv,
254                                   ref1->data[1], ref1->linesize[1],
255                                   ref1->data[2], ref1->linesize[2], tref1,
256                                   row << 2, (col << 3) + 4,
257                                   &uvmv, 4, 0, 8, 4, 4, 4, w1, h1, 0);
258                 }
259             } else {
260                 if (s->ss_h) {
261                     w1 = (w1 + 1) >> 1;
262                     uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
263                     mc_chroma_dir(s, mc[4][b->filter][0],
264                                   s->dst[1], s->dst[2], ls_uv,
265                                   ref1->data[1], ref1->linesize[1],
266                                   ref1->data[2], ref1->linesize[2], tref1,
267                                   row << 3, col << 2,
268                                   &uvmv, 0, 0, 4, 8, 4, 4, w1, h1, 0);
269                     // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
270                     // bottom block
271                     // https://code.google.com/p/webm/issues/detail?id=993
272                     uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[2][0]);
273                     mc_chroma_dir(s, mc[4][b->filter][0],
274                                   s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
275                                   ref1->data[1], ref1->linesize[1],
276                                   ref1->data[2], ref1->linesize[2], tref1,
277                                   (row << 3) + 4, col << 2,
278                                   &uvmv, 0, 4, 4, 8, 4, 4, w1, h1, 0);
279                 } else {
280                     mc_chroma_dir(s, mc[4][b->filter][0],
281                                   s->dst[1], s->dst[2], ls_uv,
282                                   ref1->data[1], ref1->linesize[1],
283                                   ref1->data[2], ref1->linesize[2], tref1,
284                                   row << 3, col << 3,
285                                   &b->mv[0][0], 0, 0, 8, 8, 4, 4, w1, h1, 0);
286                     mc_chroma_dir(s, mc[4][b->filter][0],
287                                   s->dst[1] + 4 * bytesperpixel,
288                                   s->dst[2] + 4 * bytesperpixel, ls_uv,
289                                   ref1->data[1], ref1->linesize[1],
290                                   ref1->data[2], ref1->linesize[2], tref1,
291                                   row << 3, (col << 3) + 4,
292                                   &b->mv[1][0], 4, 0, 8, 8, 4, 4, w1, h1, 0);
293                     mc_chroma_dir(s, mc[4][b->filter][0],
294                                   s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
295                                   ref1->data[1], ref1->linesize[1],
296                                   ref1->data[2], ref1->linesize[2], tref1,
297                                   (row << 3) + 4, col << 3,
298                                   &b->mv[2][0], 0, 4, 8, 8, 4, 4, w1, h1, 0);
299                     mc_chroma_dir(s, mc[4][b->filter][0],
300                                   s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
301                                   s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
302                                   ref1->data[1], ref1->linesize[1],
303                                   ref1->data[2], ref1->linesize[2], tref1,
304                                   (row << 3) + 4, (col << 3) + 4,
305                                   &b->mv[3][0], 4, 4, 8, 8, 4, 4, w1, h1, 0);
306                 }
307             }
308
309             if (b->comp) {
310                 mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
311                             ref2->data[0], ref2->linesize[0], tref2,
312                             row << 3, col << 3, &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
313                 mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
314                             ref2->data[0], ref2->linesize[0], tref2,
315                             row << 3, (col << 3) + 4, &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
316                 mc_luma_dir(s, mc[4][b->filter][1],
317                             s->dst[0] + 4 * ls_y, ls_y,
318                             ref2->data[0], ref2->linesize[0], tref2,
319                             (row << 3) + 4, col << 3, &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
320                 mc_luma_dir(s, mc[4][b->filter][1],
321                             s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
322                             ref2->data[0], ref2->linesize[0], tref2,
323                             (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
324                 if (s->ss_v) {
325                     h2 = (h2 + 1) >> 1;
326                     if (s->ss_h) {
327                         w2 = (w2 + 1) >> 1;
328                         uvmv = ROUNDED_DIV_MVx4(b->mv[0][1], b->mv[1][1],
329                                                 b->mv[2][1], b->mv[3][1]);
330                         mc_chroma_dir(s, mc[4][b->filter][1],
331                                       s->dst[1], s->dst[2], ls_uv,
332                                       ref2->data[1], ref2->linesize[1],
333                                       ref2->data[2], ref2->linesize[2], tref2,
334                                       row << 2, col << 2,
335                                       &uvmv, 0, 0, 4, 4, 4, 4, w2, h2, 1);
336                     } else {
337                         uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
338                         mc_chroma_dir(s, mc[4][b->filter][1],
339                                       s->dst[1], s->dst[2], ls_uv,
340                                       ref2->data[1], ref2->linesize[1],
341                                       ref2->data[2], ref2->linesize[2], tref2,
342                                       row << 2, col << 3,
343                                       &uvmv, 0, 0, 8, 4, 4, 4, w2, h2, 1);
344                         uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
345                         mc_chroma_dir(s, mc[4][b->filter][1],
346                                       s->dst[1] + 4 * bytesperpixel,
347                                       s->dst[2] + 4 * bytesperpixel, ls_uv,
348                                       ref2->data[1], ref2->linesize[1],
349                                       ref2->data[2], ref2->linesize[2], tref2,
350                                       row << 2, (col << 3) + 4,
351                                       &uvmv, 4, 0, 8, 4, 4, 4, w2, h2, 1);
352                     }
353                 } else {
354                     if (s->ss_h) {
355                         w2 = (w2 + 1) >> 1;
356                         uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
357                         mc_chroma_dir(s, mc[4][b->filter][1],
358                                       s->dst[1], s->dst[2], ls_uv,
359                                       ref2->data[1], ref2->linesize[1],
360                                       ref2->data[2], ref2->linesize[2], tref2,
361                                       row << 3, col << 2,
362                                       &uvmv, 0, 0, 4, 8, 4, 4, w2, h2, 1);
363                         // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
364                         // bottom block
365                         // https://code.google.com/p/webm/issues/detail?id=993
366                         uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[2][1]);
367                         mc_chroma_dir(s, mc[4][b->filter][1],
368                                       s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
369                                       ref2->data[1], ref2->linesize[1],
370                                       ref2->data[2], ref2->linesize[2], tref2,
371                                       (row << 3) + 4, col << 2,
372                                       &uvmv, 0, 4, 4, 8, 4, 4, w2, h2, 1);
373                     } else {
374                         mc_chroma_dir(s, mc[4][b->filter][1],
375                                       s->dst[1], s->dst[2], ls_uv,
376                                       ref2->data[1], ref2->linesize[1],
377                                       ref2->data[2], ref2->linesize[2], tref2,
378                                       row << 3, col << 3,
379                                       &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
380                         mc_chroma_dir(s, mc[4][b->filter][1],
381                                       s->dst[1] + 4 * bytesperpixel,
382                                       s->dst[2] + 4 * bytesperpixel, ls_uv,
383                                       ref2->data[1], ref2->linesize[1],
384                                       ref2->data[2], ref2->linesize[2], tref2,
385                                       row << 3, (col << 3) + 4,
386                                       &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
387                         mc_chroma_dir(s, mc[4][b->filter][1],
388                                       s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
389                                       ref2->data[1], ref2->linesize[1],
390                                       ref2->data[2], ref2->linesize[2], tref2,
391                                       (row << 3) + 4, col << 3,
392                                       &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
393                         mc_chroma_dir(s, mc[4][b->filter][1],
394                                       s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
395                                       s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
396                                       ref2->data[1], ref2->linesize[1],
397                                       ref2->data[2], ref2->linesize[2], tref2,
398                                       (row << 3) + 4, (col << 3) + 4,
399                                       &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
400                     }
401                 }
402             }
403         }
404     } else {
405         int bwl = bwlog_tab[0][b->bs];
406         int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
407         int uvbw = bwh_tab[s->ss_h][b->bs][0] * 4, uvbh = bwh_tab[s->ss_v][b->bs][1] * 4;
408
409         mc_luma_dir(s, mc[bwl][b->filter][0], s->dst[0], ls_y,
410                     ref1->data[0], ref1->linesize[0], tref1,
411                     row << 3, col << 3, &b->mv[0][0], 0, 0, bw, bh, bw, bh, w1, h1, 0);
412         w1 = (w1 + s->ss_h) >> s->ss_h;
413         h1 = (h1 + s->ss_v) >> s->ss_v;
414         mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][0],
415                       s->dst[1], s->dst[2], ls_uv,
416                       ref1->data[1], ref1->linesize[1],
417                       ref1->data[2], ref1->linesize[2], tref1,
418                       row << (3 - s->ss_v), col << (3 - s->ss_h),
419                       &b->mv[0][0], 0, 0, uvbw, uvbh, uvbw, uvbh, w1, h1, 0);
420
421         if (b->comp) {
422             mc_luma_dir(s, mc[bwl][b->filter][1], s->dst[0], ls_y,
423                         ref2->data[0], ref2->linesize[0], tref2,
424                         row << 3, col << 3, &b->mv[0][1], 0, 0, bw, bh, bw, bh, w2, h2, 1);
425             w2 = (w2 + s->ss_h) >> s->ss_h;
426             h2 = (h2 + s->ss_v) >> s->ss_v;
427             mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][1],
428                           s->dst[1], s->dst[2], ls_uv,
429                           ref2->data[1], ref2->linesize[1],
430                           ref2->data[2], ref2->linesize[2], tref2,
431                           row << (3 - s->ss_v), col << (3 - s->ss_h),
432                           &b->mv[0][1], 0, 0, uvbw, uvbh, uvbw, uvbh, w2, h2, 1);
433         }
434     }
435 }