]> git.sesse.net Git - ffmpeg/blob - libavcodec/vc1_loopfilter.c
025776bac9c1fcc873f911bc576c85c914ab5c84
[ffmpeg] / libavcodec / vc1_loopfilter.c
1 /*
2  * VC-1 and WMV3 decoder
3  * Copyright (c) 2011 Mashiat Sarker Shakkhar
4  * Copyright (c) 2006-2007 Konstantin Shishkov
5  * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23
24 /**
25  * @file
26  * VC-1 and WMV3 loopfilter
27  */
28
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "vc1.h"
32 #include "vc1dsp.h"
33
34 void ff_vc1_loop_filter_iblk(VC1Context *v, int pq)
35 {
36     MpegEncContext *s = &v->s;
37     int j;
38     if (!s->first_slice_line) {
39         v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq);
40         if (s->mb_x)
41             v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
42         v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
43         if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
44         for (j = 0; j < 2; j++) {
45             v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq);
46             if (s->mb_x)
47                 v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
48         }
49     }
50     v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8 * s->linesize, s->linesize, pq);
51
52     if (s->mb_y == s->end_mb_y - 1) {
53         if (s->mb_x) {
54             v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
55             if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
56             v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
57             v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
58             }
59         }
60         v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq);
61     }
62 }
63
64 void ff_vc1_loop_filter_iblk_delayed(VC1Context *v, int pq)
65 {
66     MpegEncContext *s = &v->s;
67     int j;
68
69     /* The loopfilter runs 1 row and 1 column behind the overlap filter, which
70      * means it runs two rows/cols behind the decoding loop. */
71     if (!s->first_slice_line) {
72         if (s->mb_x) {
73             if (s->mb_y >= s->start_mb_y + 2) {
74                 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
75
76                 if (s->mb_x >= 2)
77                     v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 16, s->linesize, pq);
78                 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize - 8, s->linesize, pq);
79                 if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
80                 for (j = 0; j < 2; j++) {
81                     v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
82                     if (s->mb_x >= 2) {
83                         v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize - 8, s->uvlinesize, pq);
84                     }
85                 }
86             }
87             v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize - 16, s->linesize, pq);
88         }
89
90         if (s->mb_x == s->mb_width - 1) {
91             if (s->mb_y >= s->start_mb_y + 2) {
92                 v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
93
94                 if (s->mb_x)
95                     v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize, s->linesize, pq);
96                 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 32 * s->linesize + 8, s->linesize, pq);
97                 if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))
98                 for (j = 0; j < 2; j++) {
99                     v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
100                     if (s->mb_x >= 2) {
101                         v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 16 * s->uvlinesize, s->uvlinesize, pq);
102                     }
103                 }
104             }
105             v->vc1dsp.vc1_v_loop_filter16(s->dest[0] - 8 * s->linesize, s->linesize, pq);
106         }
107
108         if (s->mb_y == s->end_mb_y) {
109             if (s->mb_x) {
110                 if (s->mb_x >= 2)
111                     v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 16, s->linesize, pq);
112                 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize - 8, s->linesize, pq);
113                 if (s->mb_x >= 2 && (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))) {
114                     for (j = 0; j < 2; j++) {
115                         v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize - 8, s->uvlinesize, pq);
116                     }
117                 }
118             }
119
120             if (s->mb_x == s->mb_width - 1) {
121                 if (s->mb_x)
122                     v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq);
123                 v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq);
124                 if (s->mb_x && (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY))) {
125                     for (j = 0; j < 2; j++) {
126                         v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq);
127                     }
128                 }
129             }
130         }
131     }
132 }
133
134 void ff_vc1_smooth_overlap_filter_iblk(VC1Context *v)
135 {
136     MpegEncContext *s = &v->s;
137     int mb_pos;
138
139     if (v->condover == CONDOVER_NONE)
140         return;
141
142     mb_pos = s->mb_x + s->mb_y * s->mb_stride;
143
144     /* Within a MB, the horizontal overlap always runs before the vertical.
145      * To accomplish that, we run the H on left and internal borders of the
146      * currently decoded MB. Then, we wait for the next overlap iteration
147      * to do H overlap on the right edge of this MB, before moving over and
148      * running the V overlap. Therefore, the V overlap makes us trail by one
149      * MB col and the H overlap filter makes us trail by one MB row. This
150      * is reflected in the time at which we run the put_pixels loop. */
151     if (v->condover == CONDOVER_ALL || v->pq >= 9 || v->over_flags_plane[mb_pos]) {
152         if (s->mb_x && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
153                         v->over_flags_plane[mb_pos - 1])) {
154             v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][1],
155                                       v->block[v->cur_blk_idx][0]);
156             v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][3],
157                                       v->block[v->cur_blk_idx][2]);
158             if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
159                 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][4],
160                                           v->block[v->cur_blk_idx][4]);
161                 v->vc1dsp.vc1_h_s_overlap(v->block[v->left_blk_idx][5],
162                                           v->block[v->cur_blk_idx][5]);
163             }
164         }
165         v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][0],
166                                   v->block[v->cur_blk_idx][1]);
167         v->vc1dsp.vc1_h_s_overlap(v->block[v->cur_blk_idx][2],
168                                   v->block[v->cur_blk_idx][3]);
169
170         if (s->mb_x == s->mb_width - 1) {
171             if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
172                                          v->over_flags_plane[mb_pos - s->mb_stride])) {
173                 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][2],
174                                           v->block[v->cur_blk_idx][0]);
175                 v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][3],
176                                           v->block[v->cur_blk_idx][1]);
177                 if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
178                     v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][4],
179                                               v->block[v->cur_blk_idx][4]);
180                     v->vc1dsp.vc1_v_s_overlap(v->block[v->top_blk_idx][5],
181                                               v->block[v->cur_blk_idx][5]);
182                 }
183             }
184             v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][0],
185                                       v->block[v->cur_blk_idx][2]);
186             v->vc1dsp.vc1_v_s_overlap(v->block[v->cur_blk_idx][1],
187                                       v->block[v->cur_blk_idx][3]);
188         }
189     }
190     if (s->mb_x && (v->condover == CONDOVER_ALL || v->over_flags_plane[mb_pos - 1])) {
191         if (!s->first_slice_line && (v->condover == CONDOVER_ALL || v->pq >= 9 ||
192                                      v->over_flags_plane[mb_pos - s->mb_stride - 1])) {
193             v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][2],
194                                       v->block[v->left_blk_idx][0]);
195             v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][3],
196                                       v->block[v->left_blk_idx][1]);
197             if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) {
198                 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][4],
199                                           v->block[v->left_blk_idx][4]);
200                 v->vc1dsp.vc1_v_s_overlap(v->block[v->topleft_blk_idx][5],
201                                           v->block[v->left_blk_idx][5]);
202             }
203         }
204         v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][0],
205                                   v->block[v->left_blk_idx][2]);
206         v->vc1dsp.vc1_v_s_overlap(v->block[v->left_blk_idx][1],
207                                   v->block[v->left_blk_idx][3]);
208     }
209 }
210
211 static av_always_inline void vc1_apply_p_v_loop_filter(VC1Context *v, int block_num)
212 {
213     MpegEncContext *s  = &v->s;
214     int mb_cbp         = v->cbp[s->mb_x - s->mb_stride],
215         block_cbp      = mb_cbp      >> (block_num * 4), bottom_cbp,
216         mb_is_intra    = v->is_intra[s->mb_x - s->mb_stride],
217         block_is_intra = mb_is_intra >> block_num, bottom_is_intra;
218     int idx, linesize  = block_num > 3 ? s->uvlinesize : s->linesize, ttblk;
219     uint8_t *dst;
220
221     if (block_num > 3) {
222         dst      = s->dest[block_num - 3];
223     } else {
224         dst      = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 8) * linesize;
225     }
226     if (s->mb_y != s->end_mb_y || block_num < 2) {
227         int16_t (*mv)[2];
228         int mv_stride;
229
230         if (block_num > 3) {
231             bottom_cbp      = v->cbp[s->mb_x]      >> (block_num * 4);
232             bottom_is_intra = v->is_intra[s->mb_x] >> block_num;
233             mv              = &v->luma_mv[s->mb_x - s->mb_stride];
234             mv_stride       = s->mb_stride;
235         } else {
236             bottom_cbp      = (block_num < 2) ? (mb_cbp               >> ((block_num + 2) * 4))
237                                               : (v->cbp[s->mb_x]      >> ((block_num - 2) * 4));
238             bottom_is_intra = (block_num < 2) ? (mb_is_intra          >> (block_num + 2))
239                                               : (v->is_intra[s->mb_x] >> (block_num - 2));
240             mv_stride       = s->b8_stride;
241             mv              = &s->current_picture.motion_val[0][s->block_index[block_num] - 2 * mv_stride];
242         }
243
244         if (bottom_is_intra & 1 || block_is_intra & 1 ||
245             mv[0][0] != mv[mv_stride][0] || mv[0][1] != mv[mv_stride][1]) {
246             v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
247         } else {
248             idx = ((bottom_cbp >> 2) | block_cbp) & 3;
249             if (idx == 3) {
250                 v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
251             } else if (idx) {
252                 if (idx == 1)
253                     v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq);
254                 else
255                     v->vc1dsp.vc1_v_loop_filter4(dst,     linesize, v->pq);
256             }
257         }
258     }
259
260     dst -= 4 * linesize;
261     ttblk = (v->ttblk[s->mb_x - s->mb_stride] >> (block_num * 4)) & 0xF;
262     if (ttblk == TT_4X4 || ttblk == TT_8X4) {
263         idx = (block_cbp | (block_cbp >> 2)) & 3;
264         if (idx == 3) {
265             v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
266         } else if (idx) {
267             if (idx == 1)
268                 v->vc1dsp.vc1_v_loop_filter4(dst + 4, linesize, v->pq);
269             else
270                 v->vc1dsp.vc1_v_loop_filter4(dst,     linesize, v->pq);
271         }
272     }
273 }
274
275 static av_always_inline void vc1_apply_p_h_loop_filter(VC1Context *v, int block_num)
276 {
277     MpegEncContext *s  = &v->s;
278     int mb_cbp         = v->cbp[s->mb_x - 1 - s->mb_stride],
279         block_cbp      = mb_cbp      >> (block_num * 4), right_cbp,
280         mb_is_intra    = v->is_intra[s->mb_x - 1 - s->mb_stride],
281         block_is_intra = mb_is_intra >> block_num, right_is_intra;
282     int idx, linesize  = block_num > 3 ? s->uvlinesize : s->linesize, ttblk;
283     uint8_t *dst;
284
285     if (block_num > 3) {
286         dst = s->dest[block_num - 3] - 8 * linesize;
287     } else {
288         dst = s->dest[0] + (block_num & 1) * 8 + ((block_num & 2) * 4 - 16) * linesize - 8;
289     }
290
291     if (s->mb_x != s->mb_width || !(block_num & 5)) {
292         int16_t (*mv)[2];
293
294         if (block_num > 3) {
295             right_cbp      = v->cbp[s->mb_x - s->mb_stride] >> (block_num * 4);
296             right_is_intra = v->is_intra[s->mb_x - s->mb_stride] >> block_num;
297             mv             = &v->luma_mv[s->mb_x - s->mb_stride - 1];
298         } else {
299             right_cbp      = (block_num & 1) ? (v->cbp[s->mb_x - s->mb_stride]      >> ((block_num - 1) * 4))
300                                              : (mb_cbp                              >> ((block_num + 1) * 4));
301             right_is_intra = (block_num & 1) ? (v->is_intra[s->mb_x - s->mb_stride] >> (block_num - 1))
302                                              : (mb_is_intra                         >> (block_num + 1));
303             mv             = &s->current_picture.motion_val[0][s->block_index[block_num] - s->b8_stride * 2 - 2];
304         }
305         if (block_is_intra & 1 || right_is_intra & 1 || mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1]) {
306             v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
307         } else {
308             idx = ((right_cbp >> 1) | block_cbp) & 5; // FIXME check
309             if (idx == 5) {
310                 v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
311             } else if (idx) {
312                 if (idx == 1)
313                     v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize, linesize, v->pq);
314                 else
315                     v->vc1dsp.vc1_h_loop_filter4(dst,                linesize, v->pq);
316             }
317         }
318     }
319
320     dst -= 4;
321     ttblk = (v->ttblk[s->mb_x - s->mb_stride - 1] >> (block_num * 4)) & 0xf;
322     if (ttblk == TT_4X4 || ttblk == TT_4X8) {
323         idx = (block_cbp | (block_cbp >> 1)) & 5;
324         if (idx == 5) {
325             v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
326         } else if (idx) {
327             if (idx == 1)
328                 v->vc1dsp.vc1_h_loop_filter4(dst + linesize * 4, linesize, v->pq);
329             else
330                 v->vc1dsp.vc1_h_loop_filter4(dst,                linesize, v->pq);
331         }
332     }
333 }
334
335 void ff_vc1_apply_p_loop_filter(VC1Context *v)
336 {
337     MpegEncContext *s = &v->s;
338     int i;
339     int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
340
341     for (i = 0; i < block_count; i++) {
342         vc1_apply_p_v_loop_filter(v, i);
343     }
344
345     /* V always precedes H, therefore we run H one MB before V;
346      * at the end of a row, we catch up to complete the row */
347     if (s->mb_x) {
348         for (i = 0; i < block_count; i++) {
349             vc1_apply_p_h_loop_filter(v, i);
350         }
351         if (s->mb_x == s->mb_width - 1) {
352             s->mb_x++;
353             ff_update_block_index(s);
354             for (i = 0; i < block_count; i++) {
355                 vc1_apply_p_h_loop_filter(v, i);
356             }
357         }
358     }
359 }