2 * VC-1 and WMV3 decoder
3 * Copyright (c) 2011 Mashiat Sarker Shakkhar
4 * Copyright (c) 2006-2007 Konstantin Shishkov
5 * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * VC-1 and WMV3 loopfilter
30 #include "mpegvideo.h"
34 void ff_vc1_loop_filter_iblk(VC1Context *v, int pq)
36 MpegEncContext *s = &v->s;
38 if (!s->first_slice_line) {
39 v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq);
41 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
42 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
43 if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))
44 for (j = 0; j < 2; j++) {
45 v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq);
47 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
50 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8 * s->linesize, s->linesize, pq);
52 if (s->mb_y == s->end_mb_y - 1) {
54 v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
55 if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
56 v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
57 v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
60 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq);
64 void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq)
66 MpegEncContext *s = &v->s;
69 /* The loopfilter runs 1 row and 1 column behind the overlap filter, which
70 * means it runs two rows/cols behind the decoding loop. */
71 if (!s->first_slice_line) {
73 if (s->mb_y >= s->start_mb_y + 2) {
74 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
77 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 16, s->linesize, pq);
78 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 8, s->linesize, pq);
79 if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))
80 for (j = 0; j < 2; j++) {
81 v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
83 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize - 8, s->uvlinesize, pq);
87 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize - 16, s->linesize, pq);
90 if (s->mb_x == s->mb_width - 1) {
91 if (s->mb_y >= s->start_mb_y + 2) {
92 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
95 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize, s->linesize, pq);
96 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize + 8, s->linesize, pq);
97 if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))
98 for (j = 0; j < 2; j++) {
99 v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
101 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize, s->uvlinesize, pq);
105 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize, s->linesize, pq);
108 if (s->mb_y == s->end_mb_y) {
111 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
112 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 8, s->linesize, pq);
113 if (s->mb_x >= 2 && (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))) {
114 for (j = 0; j < 2; j++) {
115 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
120 if (s->mb_x == s->mb_width - 1) {
122 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
123 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
124 if (s->mb_x && (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY))) {
125 for (j = 0; j < 2; j++) {
126 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
134 void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v)
136 MpegEncContext *s = &v->s;
139 if (v->condover == CONDOVER_NONE)
142 mb_pos = s->mb_x + s->mb_y * s->mb_stride;
144 /* Within a MB, the horizontal overlap always runs before the vertical.
145 * To accomplish that, we run the H on left and internal borders of the
146 * currently decoded MB. Then, we wait for the next overlap iteration
147 * to do H overlap on the right edge of this MB, before moving over and
148 * running the V overlap. Therefore, the V overlap makes us trail by one
149 * MB col and the H overlap filter makes us trail by one MB row. This
150 * is reflected in the time at which we run the put_pixels loop. */
151 if (v->condover == CONDOVER_ALL || v->pq >= 9 || v->over_flags_plane[mb_pos]) {
152 if (s->mb_x && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
153 v->over_flags_plane[mb_pos - 1])) {
154 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][1],
155 v->block[v->cur_blk_idx][0]);
156 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][3],
157 v->block[v->cur_blk_idx][2]);
158 if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
159 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][4],
160 v->block[v->cur_blk_idx][4]);
161 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][5],
162 v->block[v->cur_blk_idx][5]);
165 v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][0],
166 v->block[v->cur_blk_idx][1]);
167 v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][2],
168 v->block[v->cur_blk_idx][3]);
170 if (s->mb_x == s->mb_width - 1) {
171 if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
172 v->over_flags_plane[mb_pos - s->mb_stride])) {
173 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][2],
174 v->block[v->cur_blk_idx][0]);
175 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][3],
176 v->block[v->cur_blk_idx][1]);
177 if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
178 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][4],
179 v->block[v->cur_blk_idx][4]);
180 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][5],
181 v->block[v->cur_blk_idx][5]);
184 v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][0],
185 v->block[v->cur_blk_idx][2]);
186 v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][1],
187 v->block[v->cur_blk_idx][3]);
190 if (s->mb_x && (v->condover == CONDOVER_ALL || v->over_flags_plane[mb_pos - 1])) {
191 if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
192 v->over_flags_plane[mb_pos - s->mb_stride - 1])) {
193 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][2],
194 v->block[v->left_blk_idx][0]);
195 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][3],
196 v->block[v->left_blk_idx][1]);
197 if (!CONFIG_GRAY || !(s->avctx->flags & CODEC_FLAG_GRAY)) {
198 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][4],
199 v->block[v->left_blk_idx][4]);
200 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][5],
201 v->block[v->left_blk_idx][5]);
204 v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][0],
205 v->block[v->left_blk_idx][2]);
206 v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][1],
207 v->block[v->left_blk_idx][3]);
211 static av_always_inline void vc1_apply_p_v_loop_filter(VC1Context *v, int block_num)
213 MpegEncContext *s = &v->s;
214 int mb_cbp = v->cbp[s->mb_x - s->mb_stride],
215 block_cbp = mb_cbp >> (block_num * 4), bottom_cbp,
216 mb_is_intra = v->is_intra[s->mb_x - s->mb_stride],
217 block_is_intra = mb_is_intra >> block_num, bottom_is_intra;
218 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk;
222 dst = s->dest[block_num - 3];
224 dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 8) * linesize;
226 if (s->mb_y != s->end_mb_y || block_num < 2) {
231 bottom_cbp = v->cbp[s->mb_x] >> (block_num * 4);
232 bottom_is_intra = v->is_intra[s->mb_x] >> block_num;
233 mv = &v->luma_mv[s->mb_x - s->mb_stride];
234 mv_stride = s->mb_stride;
236 bottom_cbp = (block_num < 2) ? (mb_cbp >> ((block_num + 2) * 4))
237 : (v->cbp[s->mb_x] >> ((block_num - 2) * 4));
238 bottom_is_intra = (block_num < 2) ? (mb_is_intra >> (block_num + 2))
239 : (v->is_intra[s->mb_x] >> (block_num - 2));
240 mv_stride = s->b8_stride;
241 mv = &s->current_picture.motion_val[0][s->block_index[block_num] - 2 * mv_stride];
244 if (bottom_is_intra & 1 || block_is_intra & 1 ||
245 mv[0][0] != mv[mv_stride][0] || mv[0][1] != mv[mv_stride][1]) {
246 v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
248 idx = ((bottom_cbp >> 2) | block_cbp) & 3;
250 v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
253 v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq);
255 v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq);
261 ttblk = (v->ttblk[s->mb_x - s->mb_stride] >> (block_num * 4)) & 0xF;
262 if (ttblk == TT_4X4 || ttblk == TT_8X4) {
263 idx = (block_cbp | (block_cbp >> 2)) & 3;
265 v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
268 v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq);
270 v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq);
275 static av_always_inline void vc1_apply_p_h_loop_filter(VC1Context *v, int block_num)
277 MpegEncContext *s = &v->s;
278 int mb_cbp = v->cbp[s->mb_x - 1 - s->mb_stride],
279 block_cbp = mb_cbp >> (block_num * 4), right_cbp,
280 mb_is_intra = v->is_intra[s->mb_x - 1 - s->mb_stride],
281 block_is_intra = mb_is_intra >> block_num, right_is_intra;
282 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk;
286 dst = s->dest[block_num - 3] - 8 * linesize;
288 dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 16) * linesize - 8;
291 if (s->mb_x != s->mb_width || !(block_num & 5)) {
295 right_cbp = v->cbp[s->mb_x - s->mb_stride] >> (block_num * 4);
296 right_is_intra = v->is_intra[s->mb_x - s->mb_stride] >> block_num;
297 mv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
299 right_cbp = (block_num & 1) ? (v->cbp[s->mb_x - s->mb_stride] >> ((block_num - 1) * 4))
300 : (mb_cbp >> ((block_num + 1) * 4));
301 right_is_intra = (block_num & 1) ? (v->is_intra[s->mb_x - s->mb_stride] >> (block_num - 1))
302 : (mb_is_intra >> (block_num + 1));
303 mv = &s->current_picture.motion_val[0][s->block_index[block_num] - s->b8_stride * 2 - 2];
305 if (block_is_intra & 1 || right_is_intra & 1 || mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1]) {
306 v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
308 idx = ((right_cbp >> 1) | block_cbp) & 5; // FIXME check
310 v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
313 v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize, linesize, v->pq);
315 v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq);
321 ttblk = (v->ttblk[s->mb_x - s->mb_stride - 1] >> (block_num * 4)) & 0xf;
322 if (ttblk == TT_4X4 || ttblk == TT_4X8) {
323 idx = (block_cbp | (block_cbp >> 1)) & 5;
325 v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
328 v->vc1dsp.vc1_h_loop_filter4(dst + linesize * 4, linesize, v->pq);
330 v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq);
335 void ff_vc1_apply_p_loop_filter(VC1Context *v)
337 MpegEncContext *s = &v->s;
339 int block_count = CONFIG_GRAY && (s->avctx->flags & CODEC_FLAG_GRAY) ? 4 : 6;
341 for (i = 0; i < block_count; i++) {
342 vc1_apply_p_v_loop_filter(v, i);
345 /* V always precedes H, therefore we run H one MB before V;
346 * at the end of a row, we catch up to complete the row */
348 for (i = 0; i < block_count; i++) {
349 vc1_apply_p_h_loop_filter(v, i);
351 if (s->mb_x == s->mb_width - 1) {
353 ff_update_block_index(s);
354 for (i = 0; i < block_count; i++) {
355 vc1_apply_p_h_loop_filter(v, i);