2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #define ROUNDED_DIV_MVx2(a, b) \
25 (VP56mv) { .x = ROUNDED_DIV(a.x + b.x, 2), .y = ROUNDED_DIV(a.y + b.y, 2) }
26 #define ROUNDED_DIV_MVx4(a, b, c, d) \
27 (VP56mv) { .x = ROUNDED_DIV(a.x + b.x + c.x + d.x, 4), \
28 .y = ROUNDED_DIV(a.y + b.y + c.y + d.y, 4) }
30 static void FN(inter_pred)(AVCodecContext *ctx)
32 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
33 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
34 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
36 VP9Context *s = ctx->priv_data;
38 int row = s->row, col = s->col;
39 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
40 AVFrame *ref1 = tref1->f, *ref2;
41 int w1 = ref1->width, h1 = ref1->height, w2, h2;
42 ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
43 int bytesperpixel = BYTES_PER_PIXEL;
46 tref2 = &s->refs[s->refidx[b->ref[1]]];
57 if (b->bs == BS_8x4) {
58 mc_luma_dir(s, mc[3][b->filter][0], s->dst[0], ls_y,
59 ref1->data[0], ref1->linesize[0], tref1,
60 row << 3, col << 3, &b->mv[0][0],,,,, 8, 4, w1, h1, 0);
61 mc_luma_dir(s, mc[3][b->filter][0],
62 s->dst[0] + 4 * ls_y, ls_y,
63 ref1->data[0], ref1->linesize[0], tref1,
64 (row << 3) + 4, col << 3, &b->mv[2][0],,,,, 8, 4, w1, h1, 0);
65 w1 = (w1 + s->ss_h) >> s->ss_h;
68 uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
69 mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
70 s->dst[1], s->dst[2], ls_uv,
71 ref1->data[1], ref1->linesize[1],
72 ref1->data[2], ref1->linesize[2], tref1,
73 row << 2, col << (3 - s->ss_h),
74 &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
76 mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
77 s->dst[1], s->dst[2], ls_uv,
78 ref1->data[1], ref1->linesize[1],
79 ref1->data[2], ref1->linesize[2], tref1,
80 row << 3, col << (3 - s->ss_h),
81 &b->mv[0][0],,,,, 8 >> s->ss_h, 4, w1, h1, 0);
82 // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
83 // to get the motion vector for the bottom 4x4 block
84 // https://code.google.com/p/webm/issues/detail?id=993
88 uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
90 mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
91 s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
92 ref1->data[1], ref1->linesize[1],
93 ref1->data[2], ref1->linesize[2], tref1,
94 (row << 3) + 4, col << (3 - s->ss_h),
95 &uvmv,,,,, 8 >> s->ss_h, 4, w1, h1, 0);
99 mc_luma_dir(s, mc[3][b->filter][1], s->dst[0], ls_y,
100 ref2->data[0], ref2->linesize[0], tref2,
101 row << 3, col << 3, &b->mv[0][1],,,,, 8, 4, w2, h2, 1);
102 mc_luma_dir(s, mc[3][b->filter][1],
103 s->dst[0] + 4 * ls_y, ls_y,
104 ref2->data[0], ref2->linesize[0], tref2,
105 (row << 3) + 4, col << 3, &b->mv[2][1],,,,, 8, 4, w2, h2, 1);
106 w2 = (w2 + s->ss_h) >> s->ss_h;
109 uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
110 mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
111 s->dst[1], s->dst[2], ls_uv,
112 ref2->data[1], ref2->linesize[1],
113 ref2->data[2], ref2->linesize[2], tref2,
114 row << 2, col << (3 - s->ss_h),
115 &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
117 mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
118 s->dst[1], s->dst[2], ls_uv,
119 ref2->data[1], ref2->linesize[1],
120 ref2->data[2], ref2->linesize[2], tref2,
121 row << 3, col << (3 - s->ss_h),
122 &b->mv[0][1],,,,, 8 >> s->ss_h, 4, w2, h2, 1);
123 // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
124 // to get the motion vector for the bottom 4x4 block
125 // https://code.google.com/p/webm/issues/detail?id=993
129 uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
131 mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
132 s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
133 ref2->data[1], ref2->linesize[1],
134 ref2->data[2], ref2->linesize[2], tref2,
135 (row << 3) + 4, col << (3 - s->ss_h),
136 &uvmv,,,,, 8 >> s->ss_h, 4, w2, h2, 1);
139 } else if (b->bs == BS_4x8) {
140 mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
141 ref1->data[0], ref1->linesize[0], tref1,
142 row << 3, col << 3, &b->mv[0][0],,,,, 4, 8, w1, h1, 0);
143 mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
144 ref1->data[0], ref1->linesize[0], tref1,
145 row << 3, (col << 3) + 4, &b->mv[1][0],,,,, 4, 8, w1, h1, 0);
146 h1 = (h1 + s->ss_v) >> s->ss_v;
149 uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
150 mc_chroma_dir(s, mc[4][b->filter][0],
151 s->dst[1], s->dst[2], ls_uv,
152 ref1->data[1], ref1->linesize[1],
153 ref1->data[2], ref1->linesize[2], tref1,
154 row << (3 - s->ss_v), col << 2,
155 &uvmv,,,,, 4, 8 >> s->ss_v, w1, h1, 0);
157 mc_chroma_dir(s, mc[4][b->filter][0],
158 s->dst[1], s->dst[2], ls_uv,
159 ref1->data[1], ref1->linesize[1],
160 ref1->data[2], ref1->linesize[2], tref1,
161 row << (3 - s->ss_v), col << 3,
162 &b->mv[0][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
163 mc_chroma_dir(s, mc[4][b->filter][0],
164 s->dst[1] + 4 * bytesperpixel,
165 s->dst[2] + 4 * bytesperpixel, ls_uv,
166 ref1->data[1], ref1->linesize[1],
167 ref1->data[2], ref1->linesize[2], tref1,
168 row << (3 - s->ss_v), (col << 3) + 4,
169 &b->mv[1][0],,,,, 4, 8 >> s->ss_v, w1, h1, 0);
173 mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
174 ref2->data[0], ref2->linesize[0], tref2,
175 row << 3, col << 3, &b->mv[0][1],,,,, 4, 8, w2, h2, 1);
176 mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
177 ref2->data[0], ref2->linesize[0], tref2,
178 row << 3, (col << 3) + 4, &b->mv[1][1],,,,, 4, 8, w2, h2, 1);
179 h2 = (h2 + s->ss_v) >> s->ss_v;
182 uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
183 mc_chroma_dir(s, mc[4][b->filter][1],
184 s->dst[1], s->dst[2], ls_uv,
185 ref2->data[1], ref2->linesize[1],
186 ref2->data[2], ref2->linesize[2], tref2,
187 row << (3 - s->ss_v), col << 2,
188 &uvmv,,,,, 4, 8 >> s->ss_v, w2, h2, 1);
190 mc_chroma_dir(s, mc[4][b->filter][1],
191 s->dst[1], s->dst[2], ls_uv,
192 ref2->data[1], ref2->linesize[1],
193 ref2->data[2], ref2->linesize[2], tref2,
194 row << (3 - s->ss_v), col << 3,
195 &b->mv[0][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
196 mc_chroma_dir(s, mc[4][b->filter][1],
197 s->dst[1] + 4 * bytesperpixel,
198 s->dst[2] + 4 * bytesperpixel, ls_uv,
199 ref2->data[1], ref2->linesize[1],
200 ref2->data[2], ref2->linesize[2], tref2,
201 row << (3 - s->ss_v), (col << 3) + 4,
202 &b->mv[1][1],,,,, 4, 8 >> s->ss_v, w2, h2, 1);
208 av_assert2(b->bs == BS_4x4);
210 // FIXME if two horizontally adjacent blocks have the same MV,
211 // do a w8 instead of a w4 call
212 mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
213 ref1->data[0], ref1->linesize[0], tref1,
214 row << 3, col << 3, &b->mv[0][0],
215 0, 0, 8, 8, 4, 4, w1, h1, 0);
216 mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4 * bytesperpixel, ls_y,
217 ref1->data[0], ref1->linesize[0], tref1,
218 row << 3, (col << 3) + 4, &b->mv[1][0],
219 4, 0, 8, 8, 4, 4, w1, h1, 0);
220 mc_luma_dir(s, mc[4][b->filter][0],
221 s->dst[0] + 4 * ls_y, ls_y,
222 ref1->data[0], ref1->linesize[0], tref1,
223 (row << 3) + 4, col << 3, &b->mv[2][0],
224 0, 4, 8, 8, 4, 4, w1, h1, 0);
225 mc_luma_dir(s, mc[4][b->filter][0],
226 s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
227 ref1->data[0], ref1->linesize[0], tref1,
228 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0],
229 4, 4, 8, 8, 4, 4, w1, h1, 0);
234 uvmv = ROUNDED_DIV_MVx4(b->mv[0][0], b->mv[1][0],
235 b->mv[2][0], b->mv[3][0]);
236 mc_chroma_dir(s, mc[4][b->filter][0],
237 s->dst[1], s->dst[2], ls_uv,
238 ref1->data[1], ref1->linesize[1],
239 ref1->data[2], ref1->linesize[2], tref1,
241 &uvmv, 0, 0, 4, 4, 4, 4, w1, h1, 0);
243 uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
244 mc_chroma_dir(s, mc[4][b->filter][0],
245 s->dst[1], s->dst[2], ls_uv,
246 ref1->data[1], ref1->linesize[1],
247 ref1->data[2], ref1->linesize[2], tref1,
249 &uvmv, 0, 0, 8, 4, 4, 4, w1, h1, 0);
250 uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
251 mc_chroma_dir(s, mc[4][b->filter][0],
252 s->dst[1] + 4 * bytesperpixel,
253 s->dst[2] + 4 * bytesperpixel, ls_uv,
254 ref1->data[1], ref1->linesize[1],
255 ref1->data[2], ref1->linesize[2], tref1,
256 row << 2, (col << 3) + 4,
257 &uvmv, 4, 0, 8, 4, 4, 4, w1, h1, 0);
262 uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
263 mc_chroma_dir(s, mc[4][b->filter][0],
264 s->dst[1], s->dst[2], ls_uv,
265 ref1->data[1], ref1->linesize[1],
266 ref1->data[2], ref1->linesize[2], tref1,
268 &uvmv, 0, 0, 4, 8, 4, 4, w1, h1, 0);
269 // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
271 // https://code.google.com/p/webm/issues/detail?id=993
272 uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[2][0]);
273 mc_chroma_dir(s, mc[4][b->filter][0],
274 s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
275 ref1->data[1], ref1->linesize[1],
276 ref1->data[2], ref1->linesize[2], tref1,
277 (row << 3) + 4, col << 2,
278 &uvmv, 0, 4, 4, 8, 4, 4, w1, h1, 0);
280 mc_chroma_dir(s, mc[4][b->filter][0],
281 s->dst[1], s->dst[2], ls_uv,
282 ref1->data[1], ref1->linesize[1],
283 ref1->data[2], ref1->linesize[2], tref1,
285 &b->mv[0][0], 0, 0, 8, 8, 4, 4, w1, h1, 0);
286 mc_chroma_dir(s, mc[4][b->filter][0],
287 s->dst[1] + 4 * bytesperpixel,
288 s->dst[2] + 4 * bytesperpixel, ls_uv,
289 ref1->data[1], ref1->linesize[1],
290 ref1->data[2], ref1->linesize[2], tref1,
291 row << 3, (col << 3) + 4,
292 &b->mv[1][0], 4, 0, 8, 8, 4, 4, w1, h1, 0);
293 mc_chroma_dir(s, mc[4][b->filter][0],
294 s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
295 ref1->data[1], ref1->linesize[1],
296 ref1->data[2], ref1->linesize[2], tref1,
297 (row << 3) + 4, col << 3,
298 &b->mv[2][0], 0, 4, 8, 8, 4, 4, w1, h1, 0);
299 mc_chroma_dir(s, mc[4][b->filter][0],
300 s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
301 s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
302 ref1->data[1], ref1->linesize[1],
303 ref1->data[2], ref1->linesize[2], tref1,
304 (row << 3) + 4, (col << 3) + 4,
305 &b->mv[3][0], 4, 4, 8, 8, 4, 4, w1, h1, 0);
310 mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
311 ref2->data[0], ref2->linesize[0], tref2,
312 row << 3, col << 3, &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
313 mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4 * bytesperpixel, ls_y,
314 ref2->data[0], ref2->linesize[0], tref2,
315 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
316 mc_luma_dir(s, mc[4][b->filter][1],
317 s->dst[0] + 4 * ls_y, ls_y,
318 ref2->data[0], ref2->linesize[0], tref2,
319 (row << 3) + 4, col << 3, &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
320 mc_luma_dir(s, mc[4][b->filter][1],
321 s->dst[0] + 4 * ls_y + 4 * bytesperpixel, ls_y,
322 ref2->data[0], ref2->linesize[0], tref2,
323 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
328 uvmv = ROUNDED_DIV_MVx4(b->mv[0][1], b->mv[1][1],
329 b->mv[2][1], b->mv[3][1]);
330 mc_chroma_dir(s, mc[4][b->filter][1],
331 s->dst[1], s->dst[2], ls_uv,
332 ref2->data[1], ref2->linesize[1],
333 ref2->data[2], ref2->linesize[2], tref2,
335 &uvmv, 0, 0, 4, 4, 4, 4, w2, h2, 1);
337 uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
338 mc_chroma_dir(s, mc[4][b->filter][1],
339 s->dst[1], s->dst[2], ls_uv,
340 ref2->data[1], ref2->linesize[1],
341 ref2->data[2], ref2->linesize[2], tref2,
343 &uvmv, 0, 0, 8, 4, 4, 4, w2, h2, 1);
344 uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
345 mc_chroma_dir(s, mc[4][b->filter][1],
346 s->dst[1] + 4 * bytesperpixel,
347 s->dst[2] + 4 * bytesperpixel, ls_uv,
348 ref2->data[1], ref2->linesize[1],
349 ref2->data[2], ref2->linesize[2], tref2,
350 row << 2, (col << 3) + 4,
351 &uvmv, 4, 0, 8, 4, 4, 4, w2, h2, 1);
356 uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
357 mc_chroma_dir(s, mc[4][b->filter][1],
358 s->dst[1], s->dst[2], ls_uv,
359 ref2->data[1], ref2->linesize[1],
360 ref2->data[2], ref2->linesize[2], tref2,
362 &uvmv, 0, 0, 4, 8, 4, 4, w2, h2, 1);
363 // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
365 // https://code.google.com/p/webm/issues/detail?id=993
366 uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[2][1]);
367 mc_chroma_dir(s, mc[4][b->filter][1],
368 s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
369 ref2->data[1], ref2->linesize[1],
370 ref2->data[2], ref2->linesize[2], tref2,
371 (row << 3) + 4, col << 2,
372 &uvmv, 0, 4, 4, 8, 4, 4, w2, h2, 1);
374 mc_chroma_dir(s, mc[4][b->filter][1],
375 s->dst[1], s->dst[2], ls_uv,
376 ref2->data[1], ref2->linesize[1],
377 ref2->data[2], ref2->linesize[2], tref2,
379 &b->mv[0][1], 0, 0, 8, 8, 4, 4, w2, h2, 1);
380 mc_chroma_dir(s, mc[4][b->filter][1],
381 s->dst[1] + 4 * bytesperpixel,
382 s->dst[2] + 4 * bytesperpixel, ls_uv,
383 ref2->data[1], ref2->linesize[1],
384 ref2->data[2], ref2->linesize[2], tref2,
385 row << 3, (col << 3) + 4,
386 &b->mv[1][1], 4, 0, 8, 8, 4, 4, w2, h2, 1);
387 mc_chroma_dir(s, mc[4][b->filter][1],
388 s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
389 ref2->data[1], ref2->linesize[1],
390 ref2->data[2], ref2->linesize[2], tref2,
391 (row << 3) + 4, col << 3,
392 &b->mv[2][1], 0, 4, 8, 8, 4, 4, w2, h2, 1);
393 mc_chroma_dir(s, mc[4][b->filter][1],
394 s->dst[1] + 4 * ls_uv + 4 * bytesperpixel,
395 s->dst[2] + 4 * ls_uv + 4 * bytesperpixel, ls_uv,
396 ref2->data[1], ref2->linesize[1],
397 ref2->data[2], ref2->linesize[2], tref2,
398 (row << 3) + 4, (col << 3) + 4,
399 &b->mv[3][1], 4, 4, 8, 8, 4, 4, w2, h2, 1);
405 int bwl = bwlog_tab[0][b->bs];
406 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
407 int uvbw = bwh_tab[s->ss_h][b->bs][0] * 4, uvbh = bwh_tab[s->ss_v][b->bs][1] * 4;
409 mc_luma_dir(s, mc[bwl][b->filter][0], s->dst[0], ls_y,
410 ref1->data[0], ref1->linesize[0], tref1,
411 row << 3, col << 3, &b->mv[0][0], 0, 0, bw, bh, bw, bh, w1, h1, 0);
412 w1 = (w1 + s->ss_h) >> s->ss_h;
413 h1 = (h1 + s->ss_v) >> s->ss_v;
414 mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][0],
415 s->dst[1], s->dst[2], ls_uv,
416 ref1->data[1], ref1->linesize[1],
417 ref1->data[2], ref1->linesize[2], tref1,
418 row << (3 - s->ss_v), col << (3 - s->ss_h),
419 &b->mv[0][0], 0, 0, uvbw, uvbh, uvbw, uvbh, w1, h1, 0);
422 mc_luma_dir(s, mc[bwl][b->filter][1], s->dst[0], ls_y,
423 ref2->data[0], ref2->linesize[0], tref2,
424 row << 3, col << 3, &b->mv[0][1], 0, 0, bw, bh, bw, bh, w2, h2, 1);
425 w2 = (w2 + s->ss_h) >> s->ss_h;
426 h2 = (h2 + s->ss_v) >> s->ss_v;
427 mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][1],
428 s->dst[1], s->dst[2], ls_uv,
429 ref2->data[1], ref2->linesize[1],
430 ref2->data[2], ref2->linesize[2], tref2,
431 row << (3 - s->ss_v), col << (3 - s->ss_h),
432 &b->mv[0][1], 0, 0, uvbw, uvbh, uvbw, uvbh, w2, h2, 1);