2 * VC-1 and WMV3 decoder
3 * Copyright (c) 2011 Mashiat Sarker Shakkhar
4 * Copyright (c) 2006-2007 Konstantin Shishkov
5 * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * VC-1 and WMV3 loopfilter
30 #include "mpegvideo.h"
34 void ff_vc1_loop_filter_iblk(VC1Context *v, int pq)
36 MpegEncContext *s = &v->s;
38 if (!s->first_slice_line) {
39 v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq);
41 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
42 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
43 for (j = 0; j < 2; j++) {
44 v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq);
46 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
49 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8 * s->linesize, s->linesize, pq);
51 if (s->mb_y == s->end_mb_y - 1) {
53 v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
54 v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
55 v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
57 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq);
61 void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq)
63 MpegEncContext *s = &v->s;
66 /* The loopfilter runs 1 row and 1 column behind the overlap filter, which
67 * means it runs two rows/cols behind the decoding loop. */
68 if (!s->first_slice_line) {
70 if (s->mb_y >= s->start_mb_y + 2) {
71 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
74 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 16, s->linesize, pq);
75 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 8, s->linesize, pq);
76 for (j = 0; j < 2; j++) {
77 v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
79 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize - 8, s->uvlinesize, pq);
83 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize - 16, s->linesize, pq);
86 if (s->mb_x == s->mb_width - 1) {
87 if (s->mb_y >= s->start_mb_y + 2) {
88 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
91 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize, s->linesize, pq);
92 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize + 8, s->linesize, pq);
93 for (j = 0; j < 2; j++) {
94 v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
96 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize, s->uvlinesize, pq);
100 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize, s->linesize, pq);
103 if (s->mb_y == s->end_mb_y) {
106 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
107 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 8, s->linesize, pq);
109 for (j = 0; j < 2; j++) {
110 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
115 if (s->mb_x == s->mb_width - 1) {
117 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
118 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
120 for (j = 0; j < 2; j++) {
121 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
129 void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v)
131 MpegEncContext *s = &v->s;
134 if (v->condover == CONDOVER_NONE)
137 mb_pos = s->mb_x + s->mb_y * s->mb_stride;
139 /* Within a MB, the horizontal overlap always runs before the vertical.
140 * To accomplish that, we run the H on left and internal borders of the
141 * currently decoded MB. Then, we wait for the next overlap iteration
142 * to do H overlap on the right edge of this MB, before moving over and
143 * running the V overlap. Therefore, the V overlap makes us trail by one
144 * MB col and the H overlap filter makes us trail by one MB row. This
145 * is reflected in the time at which we run the put_pixels loop. */
146 if (v->condover == CONDOVER_ALL || v->pq >= 9 || v->over_flags_plane[mb_pos]) {
147 if (s->mb_x && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
148 v->over_flags_plane[mb_pos - 1])) {
149 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][1],
150 v->block[v->cur_blk_idx][0]);
151 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][3],
152 v->block[v->cur_blk_idx][2]);
153 if (!(s->flags & CODEC_FLAG_GRAY)) {
154 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][4],
155 v->block[v->cur_blk_idx][4]);
156 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][5],
157 v->block[v->cur_blk_idx][5]);
160 v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][0],
161 v->block[v->cur_blk_idx][1]);
162 v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][2],
163 v->block[v->cur_blk_idx][3]);
165 if (s->mb_x == s->mb_width - 1) {
166 if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
167 v->over_flags_plane[mb_pos - s->mb_stride])) {
168 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][2],
169 v->block[v->cur_blk_idx][0]);
170 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][3],
171 v->block[v->cur_blk_idx][1]);
172 if (!(s->flags & CODEC_FLAG_GRAY)) {
173 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][4],
174 v->block[v->cur_blk_idx][4]);
175 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][5],
176 v->block[v->cur_blk_idx][5]);
179 v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][0],
180 v->block[v->cur_blk_idx][2]);
181 v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][1],
182 v->block[v->cur_blk_idx][3]);
185 if (s->mb_x && (v->condover == CONDOVER_ALL || v->over_flags_plane[mb_pos - 1])) {
186 if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
187 v->over_flags_plane[mb_pos - s->mb_stride - 1])) {
188 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][2],
189 v->block[v->left_blk_idx][0]);
190 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][3],
191 v->block[v->left_blk_idx][1]);
192 if (!(s->flags & CODEC_FLAG_GRAY)) {
193 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][4],
194 v->block[v->left_blk_idx][4]);
195 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][5],
196 v->block[v->left_blk_idx][5]);
199 v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][0],
200 v->block[v->left_blk_idx][2]);
201 v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][1],
202 v->block[v->left_blk_idx][3]);
206 static av_always_inline void vc1_apply_p_v_loop_filter(VC1Context *v, int block_num)
208 MpegEncContext *s = &v->s;
209 int mb_cbp = v->cbp[s->mb_x - s->mb_stride],
210 block_cbp = mb_cbp >> (block_num * 4), bottom_cbp,
211 mb_is_intra = v->is_intra[s->mb_x - s->mb_stride],
212 block_is_intra = mb_is_intra >> block_num, bottom_is_intra;
213 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk;
217 dst = s->dest[block_num - 3];
219 dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 8) * linesize;
221 if (s->mb_y != s->end_mb_y || block_num < 2) {
226 bottom_cbp = v->cbp[s->mb_x] >> (block_num * 4);
227 bottom_is_intra = v->is_intra[s->mb_x] >> block_num;
228 mv = &v->luma_mv[s->mb_x - s->mb_stride];
229 mv_stride = s->mb_stride;
231 bottom_cbp = (block_num < 2) ? (mb_cbp >> ((block_num + 2) * 4))
232 : (v->cbp[s->mb_x] >> ((block_num - 2) * 4));
233 bottom_is_intra = (block_num < 2) ? (mb_is_intra >> (block_num + 2))
234 : (v->is_intra[s->mb_x] >> (block_num - 2));
235 mv_stride = s->b8_stride;
236 mv = &s->current_picture.motion_val[0][s->block_index[block_num] - 2 * mv_stride];
239 if (bottom_is_intra & 1 || block_is_intra & 1 ||
240 mv[0][0] != mv[mv_stride][0] || mv[0][1] != mv[mv_stride][1]) {
241 v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
243 idx = ((bottom_cbp >> 2) | block_cbp) & 3;
245 v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
248 v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq);
250 v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq);
256 ttblk = (v->ttblk[s->mb_x - s->mb_stride] >> (block_num * 4)) & 0xF;
257 if (ttblk == TT_4X4 || ttblk == TT_8X4) {
258 idx = (block_cbp | (block_cbp >> 2)) & 3;
260 v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
263 v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq);
265 v->vc1dsp.vc1_v_loop_filter4(dst, linesize, v->pq);
270 static av_always_inline void vc1_apply_p_h_loop_filter(VC1Context *v, int block_num)
272 MpegEncContext *s = &v->s;
273 int mb_cbp = v->cbp[s->mb_x - 1 - s->mb_stride],
274 block_cbp = mb_cbp >> (block_num * 4), right_cbp,
275 mb_is_intra = v->is_intra[s->mb_x - 1 - s->mb_stride],
276 block_is_intra = mb_is_intra >> block_num, right_is_intra;
277 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize, ttblk;
281 dst = s->dest[block_num - 3] - 8 * linesize;
283 dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 16) * linesize - 8;
286 if (s->mb_x != s->mb_width || !(block_num & 5)) {
290 right_cbp = v->cbp[s->mb_x - s->mb_stride] >> (block_num * 4);
291 right_is_intra = v->is_intra[s->mb_x - s->mb_stride] >> block_num;
292 mv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
294 right_cbp = (block_num & 1) ? (v->cbp[s->mb_x - s->mb_stride] >> ((block_num - 1) * 4))
295 : (mb_cbp >> ((block_num + 1) * 4));
296 right_is_intra = (block_num & 1) ? (v->is_intra[s->mb_x - s->mb_stride] >> (block_num - 1))
297 : (mb_is_intra >> (block_num + 1));
298 mv = &s->current_picture.motion_val[0][s->block_index[block_num] - s->b8_stride * 2 - 2];
300 if (block_is_intra & 1 || right_is_intra & 1 || mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1]) {
301 v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
303 idx = ((right_cbp >> 1) | block_cbp) & 5; // FIXME check
305 v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
308 v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize, linesize, v->pq);
310 v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq);
316 ttblk = (v->ttblk[s->mb_x - s->mb_stride - 1] >> (block_num * 4)) & 0xf;
317 if (ttblk == TT_4X4 || ttblk == TT_4X8) {
318 idx = (block_cbp | (block_cbp >> 1)) & 5;
320 v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
323 v->vc1dsp.vc1_h_loop_filter4(dst + linesize * 4, linesize, v->pq);
325 v->vc1dsp.vc1_h_loop_filter4(dst, linesize, v->pq);
330 void ff_vc1_apply_p_loop_filter(VC1Context *v)
332 MpegEncContext *s = &v->s;
335 for (i = 0; i < 6; i++) {
336 vc1_apply_p_v_loop_filter(v, i);
339 /* V always precedes H, therefore we run H one MB before V;
340 * at the end of a row, we catch up to complete the row */
342 for (i = 0; i < 6; i++) {
343 vc1_apply_p_h_loop_filter(v, i);
345 if (s->mb_x == s->mb_width - 1) {
347 ff_update_block_index(s);
348 for (i = 0; i < 6; i++) {
349 vc1_apply_p_h_loop_filter(v, i);