2 * VC-1 and WMV3 decoder
3 * Copyright (c) 2011 Mashiat Sarker Shakkhar
4 * Copyright (c) 2006-2007 Konstantin Shishkov
5 * Partly based on vc9.c (c) 2005 Anonymous, Alex Beregszaszi, Michael Niedermayer
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * VC-1 and WMV3 loopfilter
30 #include "mpegvideo.h"
34 static av_always_inline void vc1_h_overlap_filter(VC1Context *v, int16_t (*left_block)[64],
35 int16_t (*right_block)[64], int left_fieldtx,
36 int right_fieldtx, int block_num)
40 v->vc1dsp.vc1_h_s_overlap(left_block[2],
42 left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8,
43 left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8,
44 left_fieldtx || right_fieldtx ? 0 : 1);
48 v->vc1dsp.vc1_h_s_overlap(right_block[0],
52 right_fieldtx ? 0 : 1);
56 v->vc1dsp.vc1_h_s_overlap(!left_fieldtx && right_fieldtx ? left_block[2] + 8 : left_block[3],
57 left_fieldtx && !right_fieldtx ? right_block[0] + 8 : right_block[1],
58 left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8,
59 left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8,
60 left_fieldtx || right_fieldtx ? 2 : 1);
64 v->vc1dsp.vc1_h_s_overlap(right_block[1],
68 right_fieldtx ? 2 : 1);
73 v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num], 8, 8, 1);
78 static av_always_inline void vc1_v_overlap_filter(VC1Context *v, int16_t (*top_block)[64],
79 int16_t (*bottom_block)[64], int block_num)
83 v->vc1dsp.vc1_v_s_overlap(top_block[1], bottom_block[0]);
87 v->vc1dsp.vc1_v_s_overlap(top_block[3], bottom_block[2]);
91 v->vc1dsp.vc1_v_s_overlap(bottom_block[0], bottom_block[1]);
95 v->vc1dsp.vc1_v_s_overlap(bottom_block[2], bottom_block[3]);
100 v->vc1dsp.vc1_v_s_overlap(top_block[block_num], bottom_block[block_num]);
105 void ff_vc1_i_overlap_filter(VC1Context *v)
107 MpegEncContext *s = &v->s;
108 int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
109 int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
110 int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
113 topleft_blk = v->block[v->topleft_blk_idx];
114 top_blk = v->block[v->top_blk_idx];
115 left_blk = v->block[v->left_blk_idx];
116 cur_blk = v->block[v->cur_blk_idx];
118 /* Within a MB, the horizontal overlap always runs before the vertical.
119 * To accomplish that, we run the H on the left and internal vertical
120 * borders of the currently decoded MB. Then, we wait for the next overlap
121 * iteration to do H overlap on the right edge of this MB, before moving
122 * over and running the V overlap on the top and internal horizontal
123 * borders. Therefore, the H overlap trails by one MB col and the
124 * V overlap trails by one MB row. This is reflected in the time at which
125 * we run the put_pixels loop, i.e. delayed by one row and one column. */
126 for (i = 0; i < block_count; i++) {
127 if (s->mb_x == 0 && (i & 5) != 1)
130 if (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
131 (v->condover == CONDOVER_ALL ||
132 (v->over_flags_plane[mb_pos] &&
133 ((i & 5) == 1 || v->over_flags_plane[mb_pos - 1])))))
134 vc1_h_overlap_filter(v,
135 s->mb_x ? left_blk : cur_blk, cur_blk,
136 v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1],
137 v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos],
141 if (v->fcm != ILACE_FRAME)
142 for (i = 0; i < block_count; i++) {
143 if (s->first_slice_line && !(i & 2))
147 (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
148 (v->condover == CONDOVER_ALL ||
149 (v->over_flags_plane[mb_pos - 1] &&
150 ((i & 2) || v->over_flags_plane[mb_pos - 1 - s->mb_stride]))))))
151 vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
152 if (s->mb_x == s->mb_width - 1 &&
153 (v->pq >= 9 || (v->profile == PROFILE_ADVANCED &&
154 (v->condover == CONDOVER_ALL ||
155 (v->over_flags_plane[mb_pos] &&
156 ((i & 2) || v->over_flags_plane[mb_pos - s->mb_stride]))))))
157 vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
161 void ff_vc1_p_overlap_filter(VC1Context *v)
163 MpegEncContext *s = &v->s;
164 int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64];
165 int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
166 int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
169 topleft_blk = v->block[v->topleft_blk_idx];
170 top_blk = v->block[v->top_blk_idx];
171 left_blk = v->block[v->left_blk_idx];
172 cur_blk = v->block[v->cur_blk_idx];
174 for (i = 0; i < block_count; i++) {
175 if (s->mb_x == 0 && (i & 5) != 1)
178 if (v->mb_type[0][s->block_index[i]] && v->mb_type[0][s->block_index[i] - 1])
179 vc1_h_overlap_filter(v,
180 s->mb_x ? left_blk : cur_blk, cur_blk,
181 v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1],
182 v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos],
186 if (v->fcm != ILACE_FRAME)
187 for (i = 0; i < block_count; i++) {
188 if (s->first_slice_line && !(i & 2))
191 if (s->mb_x && v->mb_type[0][s->block_index[i] - 2 + (i > 3)] &&
192 v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 2 + (i > 3)])
193 vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i);
194 if (s->mb_x == s->mb_width - 1)
195 if (v->mb_type[0][s->block_index[i]] &&
196 v->mb_type[0][s->block_index[i] - s->block_wrap[i]])
197 vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i);
201 #define LEFT_EDGE (1 << 0)
202 #define RIGHT_EDGE (1 << 1)
203 #define TOP_EDGE (1 << 2)
204 #define BOTTOM_EDGE (1 << 3)
206 static av_always_inline void vc1_i_h_loop_filter(VC1Context *v, uint8_t *dest,
207 uint32_t flags, int block_num)
209 MpegEncContext *s = &v->s;
216 if (!(flags & LEFT_EDGE) || (block_num & 5) == 1) {
220 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
222 if (v->fcm == ILACE_FRAME)
224 v->vc1dsp.vc1_h_loop_filter4(dst, 2 * s->uvlinesize, pq);
225 v->vc1dsp.vc1_h_loop_filter4(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
227 v->vc1dsp.vc1_h_loop_filter8(dst, 2 * s->linesize, pq);
228 v->vc1dsp.vc1_h_loop_filter8(dst + s->linesize, 2 * s->linesize, pq);
232 v->vc1dsp.vc1_h_loop_filter8(dst, s->uvlinesize, pq);
234 v->vc1dsp.vc1_h_loop_filter16(dst, s->linesize, pq);
238 static av_always_inline void vc1_i_v_loop_filter(VC1Context *v, uint8_t *dest,
239 uint32_t flags, uint8_t fieldtx,
242 MpegEncContext *s = &v->s;
246 if ((block_num & 5) == 1)
249 if (!(flags & TOP_EDGE) || block_num & 2) {
253 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
255 if (v->fcm == ILACE_FRAME) {
257 v->vc1dsp.vc1_v_loop_filter8(dst, 2 * s->uvlinesize, pq);
258 v->vc1dsp.vc1_v_loop_filter8(dst + s->uvlinesize, 2 * s->uvlinesize, pq);
259 } else if (block_num < 2 || !fieldtx) {
260 v->vc1dsp.vc1_v_loop_filter16(dst, 2 * s->linesize, pq);
261 v->vc1dsp.vc1_v_loop_filter16(dst + s->linesize, 2 * s->linesize, pq);
265 v->vc1dsp.vc1_v_loop_filter8(dst, s->uvlinesize, pq);
267 v->vc1dsp.vc1_v_loop_filter16(dst, s->linesize, pq);
271 void ff_vc1_i_loop_filter(VC1Context *v)
273 MpegEncContext *s = &v->s;
274 int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
275 int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
276 uint8_t *dest, fieldtx;
280 /* Within a MB, the vertical loop filter always runs before the horizontal.
281 * To accomplish that, we run the V loop filter on top and internal
282 * horizontal borders of the last overlap filtered MB. Then, we wait for
283 * the loop filter iteration on the next row to do V loop filter on the
284 * bottom edge of this MB, before moving over and running the H loop
285 * filter on the left and internal vertical borders. Therefore, the loop
286 * filter trails by one row and one column relative to the overlap filter
287 * and two rows and two columns relative to the decoding loop. */
288 if (!s->first_slice_line) {
289 dest = s->dest[0] - 16 * s->linesize - 16;
290 flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
292 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
293 for (i = 0; i < block_count; i++)
294 vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, fieldtx, i);
296 if (s->mb_x == v->end_mb_x - 1) {
298 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
299 for (i = 0; i < block_count; i++)
300 vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, fieldtx, i);
303 if (s->mb_y == s->end_mb_y - 1) {
304 dest = s->dest[0] - 16;
305 flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
307 fieldtx = v->fieldtx_plane[mb_pos - 1];
308 for (i = 0; i < block_count; i++)
309 vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, fieldtx, i);
311 if (s->mb_x == v->end_mb_x - 1) {
313 fieldtx = v->fieldtx_plane[mb_pos];
314 for (i = 0; i < block_count; i++)
315 vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, fieldtx, i);
319 if (s->mb_y >= s->start_mb_y + 2) {
320 dest = s->dest[0] - 32 * s->linesize - 16;
322 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
323 for (i = 0; i < block_count; i++)
324 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest, flags, i);
326 if (s->mb_x == v->end_mb_x - 1) {
328 flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
329 for (i = 0; i < block_count; i++)
330 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest, flags, i);
333 if (s->mb_y == s->end_mb_y - 1) {
334 if (s->mb_y >= s->start_mb_y + 1) {
335 dest = s->dest[0] - 16 * s->linesize - 16;
337 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
338 for (i = 0; i < block_count; i++)
339 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, i);
341 if (s->mb_x == v->end_mb_x - 1) {
342 flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
344 for (i = 0; i < block_count; i++)
345 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, flags, i);
348 dest = s->dest[0] - 16;
350 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
351 for (i = 0; i < block_count; i++)
352 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, i);
354 if (s->mb_x == v->end_mb_x - 1) {
356 flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
357 for (i = 0; i < block_count; i++)
358 vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, flags, i);
363 static av_always_inline void vc1_p_h_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
364 uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
365 int *ttblk, uint32_t flags, int block_num)
367 MpegEncContext *s = &v->s;
369 uint32_t left_cbp = cbp[0] >> (block_num * 4), right_cbp;
370 uint8_t left_is_intra, right_is_intra;
372 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
378 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
380 if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
381 left_is_intra = is_intra[0] & (1 << block_num);
384 right_is_intra = is_intra[1] & (1 << block_num);
385 right_cbp = cbp[1] >> (block_num * 4);
386 } else if (block_num & 1) {
387 right_is_intra = is_intra[1] & (1 << block_num - 1);
388 right_cbp = cbp[1] >> ((block_num - 1) * 4);
390 right_is_intra = is_intra[0] & (1 << block_num + 1);
391 right_cbp = cbp[0] >> ((block_num + 1) * 4);
394 if (left_is_intra || right_is_intra ||
395 mv[0][0] != mv[1][0] || mv[0][1] != mv[1][1] ||
396 (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[1]))
397 v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
399 idx = (left_cbp | (right_cbp >> 1)) & 5;
401 v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 8, linesize, pq);
403 v->vc1dsp.vc1_h_loop_filter4(dst + 8, linesize, pq);
407 tt = ttblk[0] >> (block_num * 4) & 0xf;
408 if (tt == TT_4X4 || tt == TT_4X8) {
410 v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
412 v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
416 static av_always_inline void vc1_p_v_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
417 uint8_t *is_intra, int16_t (*mv)[2], uint8_t *mv_f,
418 int *ttblk, uint32_t flags, int block_num)
420 MpegEncContext *s = &v->s;
422 uint32_t top_cbp = cbp[0] >> (block_num * 4), bottom_cbp;
423 uint8_t top_is_intra, bottom_is_intra;
425 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
431 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
433 if(!(flags & BOTTOM_EDGE) || block_num < 2) {
434 top_is_intra = is_intra[0] & (1 << block_num);
437 bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num);
438 bottom_cbp = cbp[s->mb_stride] >> (block_num * 4);
439 } else if (block_num < 2) {
440 bottom_is_intra = is_intra[0] & (1 << block_num + 2);
441 bottom_cbp = cbp[0] >> ((block_num + 2) * 4);
443 bottom_is_intra = is_intra[s->mb_stride] & (1 << block_num - 2);
444 bottom_cbp = cbp[s->mb_stride] >> ((block_num - 2) * 4);
447 if (top_is_intra || bottom_is_intra ||
448 mv[0][0] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][0] ||
449 mv[0][1] != mv[block_num > 3 ? s->mb_stride : s->b8_stride][1] ||
450 (v->fcm == ILACE_FIELD && mv_f[0] != mv_f[block_num > 3 ? s->mb_stride : s->b8_stride]))
451 v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
453 idx = (top_cbp | (bottom_cbp >> 2)) & 3;
455 v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize + 4, linesize, pq);
457 v->vc1dsp.vc1_v_loop_filter4(dst + 8 * linesize, linesize, pq);
461 tt = ttblk[0] >> (block_num * 4) & 0xf;
462 if (tt == TT_4X4 || tt == TT_8X4) {
464 v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
466 v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
470 void ff_vc1_p_loop_filter(VC1Context *v)
472 MpegEncContext *s = &v->s;
473 int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
482 /* Within a MB, the vertical loop filter always runs before the horizontal.
483 * To accomplish that, we run the V loop filter on all applicable
484 * horizontal borders of the MB above the last overlap filtered MB. Then,
485 * we wait for the next loop filter iteration to do H loop filter on all
486 * applicable vertical borders of this MB. Therefore, the loop filter
487 * trails by one row and one column relative to the overlap filter and two
488 * rows and two columns relative to the decoding loop. */
489 if (s->mb_y >= s->start_mb_y + 2) {
491 dest = s->dest[0] - 32 * s->linesize - 16;
492 cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
493 is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
494 uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
495 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
496 flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
497 for (i = 0; i < block_count; i++)
498 vc1_p_v_loop_filter(v,
499 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
503 &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
504 i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
505 &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
510 if (s->mb_x == s->mb_width - 1) {
511 dest = s->dest[0] - 32 * s->linesize;
512 cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
513 is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
514 uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
515 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
516 flags = s->mb_y == s->start_mb_y + 2 ? TOP_EDGE : 0;
517 for (i = 0; i < block_count; i++)
518 vc1_p_v_loop_filter(v,
519 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
523 &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
524 i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
525 &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
531 if (s->mb_y == s->end_mb_y - 1) {
533 if (s->mb_y >= s->start_mb_y + 1) {
534 dest = s->dest[0] - 16 * s->linesize - 16;
535 cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
536 is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
537 uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
538 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
539 flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
540 for (i = 0; i < block_count; i++)
541 vc1_p_v_loop_filter(v,
542 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
546 &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
547 i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
548 &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
553 dest = s->dest[0] - 16;
554 cbp = &v->cbp[s->mb_x - 1];
555 is_intra = &v->is_intra[s->mb_x - 1];
556 uvmv = &v->luma_mv[s->mb_x - 1];
557 ttblk = &v->ttblk[s->mb_x - 1];
558 flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
559 for (i = 0; i < block_count; i++)
560 vc1_p_v_loop_filter(v,
561 i > 3 ? s->dest[i - 3] - 8 : dest,
565 &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
566 i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
567 &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
572 if (s->mb_x == s->mb_width - 1) {
573 if (s->mb_y >= s->start_mb_y + 1) {
574 dest = s->dest[0] - 16 * s->linesize;
575 cbp = &v->cbp[s->mb_x - s->mb_stride];
576 is_intra = &v->is_intra[s->mb_x - s->mb_stride];
577 uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
578 ttblk = &v->ttblk[s->mb_x - s->mb_stride];
579 flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
580 for (i = 0; i < block_count; i++)
581 vc1_p_v_loop_filter(v,
582 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
586 &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
587 i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
588 &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
594 cbp = &v->cbp[s->mb_x];
595 is_intra = &v->is_intra[s->mb_x];
596 uvmv = &v->luma_mv[s->mb_x];
597 ttblk = &v->ttblk[s->mb_x];
598 flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
599 for (i = 0; i < block_count; i++)
600 vc1_p_v_loop_filter(v,
601 i > 3 ? s->dest[i - 3] : dest,
605 &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
606 i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
607 &v->mv_f[0][s->block_index[i] + v->blocks_off],
614 if (s->mb_y >= s->start_mb_y + 2) {
616 dest = s->dest[0] - 32 * s->linesize - 32;
617 cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 2];
618 is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 2];
619 uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 2];
620 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
621 flags = s->mb_x == 2 ? LEFT_EDGE : 0;
622 for (i = 0; i < block_count; i++)
623 vc1_p_h_loop_filter(v,
624 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
628 &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
629 i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 2 + v->mb_off] :
630 &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 4 + v->blocks_off],
635 if (s->mb_x == s->mb_width - 1) {
637 dest = s->dest[0] - 32 * s->linesize - 16;
638 cbp = &v->cbp[s->mb_x - 2 * s->mb_stride - 1];
639 is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride - 1];
640 uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride - 1];
641 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
642 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
643 for (i = 0; i < block_count; i++)
644 vc1_p_h_loop_filter(v,
645 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
649 &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
650 i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride - 1 + v->mb_off] :
651 &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride - 2 + v->blocks_off],
656 dest = s->dest[0] - 32 * s->linesize;
657 cbp = &v->cbp[s->mb_x - 2 * s->mb_stride];
658 is_intra = &v->is_intra[s->mb_x - 2 * s->mb_stride];
659 uvmv = &v->luma_mv[s->mb_x - 2 * s->mb_stride];
660 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
661 flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
662 for (i = 0; i < block_count; i++)
663 vc1_p_h_loop_filter(v,
664 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
668 &s->current_picture.motion_val[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
669 i > 3 ? &v->mv_f[0][s->block_index[i] - 2 * s->mb_stride + v->mb_off] :
670 &v->mv_f[0][s->block_index[i] - 4 * s->b8_stride + v->blocks_off],
676 if (s->mb_y == s->end_mb_y - 1) {
677 if (s->mb_y >= s->start_mb_y + 1) {
679 dest = s->dest[0] - 16 * s->linesize - 32;
680 cbp = &v->cbp[s->mb_x - s->mb_stride - 2];
681 is_intra = &v->is_intra[s->mb_x - s->mb_stride - 2];
682 uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 2];
683 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
684 flags = s->mb_x == 2 ? LEFT_EDGE : 0;
685 for (i = 0; i < block_count; i++)
686 vc1_p_h_loop_filter(v,
687 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
691 &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
692 i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 2 + v->mb_off] :
693 &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 4 + v->blocks_off],
698 if (s->mb_x == s->mb_width - 1) {
700 dest = s->dest[0] - 16 * s->linesize - 16;
701 cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
702 is_intra = &v->is_intra[s->mb_x - s->mb_stride - 1];
703 uvmv = &v->luma_mv[s->mb_x - s->mb_stride - 1];
704 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
705 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
706 for (i = 0; i < block_count; i++)
707 vc1_p_h_loop_filter(v,
708 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
712 &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
713 i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride - 1 + v->mb_off] :
714 &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride - 2 + v->blocks_off],
719 dest = s->dest[0] - 16 * s->linesize;
720 cbp = &v->cbp[s->mb_x - s->mb_stride];
721 is_intra = &v->is_intra[s->mb_x - s->mb_stride];
722 uvmv = &v->luma_mv[s->mb_x - s->mb_stride];
723 ttblk = &v->ttblk[s->mb_x - s->mb_stride];
724 flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
725 for (i = 0; i < block_count; i++)
726 vc1_p_h_loop_filter(v,
727 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
731 &s->current_picture.motion_val[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
732 i > 3 ? &v->mv_f[0][s->block_index[i] - s->mb_stride + v->mb_off] :
733 &v->mv_f[0][s->block_index[i] - 2 * s->b8_stride + v->blocks_off],
740 dest = s->dest[0] - 32;
741 cbp = &v->cbp[s->mb_x - 2];
742 is_intra = &v->is_intra[s->mb_x - 2];
743 uvmv = &v->luma_mv[s->mb_x - 2];
744 ttblk = &v->ttblk[s->mb_x - 2];
745 flags = s->mb_x == 2 ? LEFT_EDGE : 0;
746 for (i = 0; i < block_count; i++)
747 vc1_p_h_loop_filter(v,
748 i > 3 ? s->dest[i - 3] - 16 : dest,
752 &s->current_picture.motion_val[0][s->block_index[i] - 4 + v->blocks_off],
753 i > 3 ? &v->mv_f[0][s->block_index[i] - 2 + v->mb_off] :
754 &v->mv_f[0][s->block_index[i] - 4 + v->blocks_off],
759 if (s->mb_x == s->mb_width - 1) {
761 dest = s->dest[0] - 16;
762 cbp = &v->cbp[s->mb_x - 1];
763 is_intra = &v->is_intra[s->mb_x - 1];
764 uvmv = &v->luma_mv[s->mb_x - 1];
765 ttblk = &v->ttblk[s->mb_x - 1];
766 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
767 for (i = 0; i < block_count; i++)
768 vc1_p_h_loop_filter(v,
769 i > 3 ? s->dest[i - 3] - 8 : dest,
773 &s->current_picture.motion_val[0][s->block_index[i] - 2 + v->blocks_off],
774 i > 3 ? &v->mv_f[0][s->block_index[i] - 1 + v->mb_off] :
775 &v->mv_f[0][s->block_index[i] - 2 + v->blocks_off],
781 cbp = &v->cbp[s->mb_x];
782 is_intra = &v->is_intra[s->mb_x];
783 uvmv = &v->luma_mv[s->mb_x];
784 ttblk = &v->ttblk[s->mb_x];
785 flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
786 for (i = 0; i < block_count; i++)
787 vc1_p_h_loop_filter(v,
788 i > 3 ? s->dest[i - 3] : dest,
792 &s->current_picture.motion_val[0][s->block_index[i] + v->blocks_off],
793 i > 3 ? &v->mv_f[0][s->block_index[i] + v->mb_off] :
794 &v->mv_f[0][s->block_index[i] + v->blocks_off],
802 static av_always_inline void vc1_p_h_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
803 uint32_t flags, uint8_t fieldtx, int block_num)
805 MpegEncContext *s = &v->s;
808 int linesize = block_num > 3 ? s->uvlinesize : s->linesize;
814 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
816 tt = ttblk[0] >> (block_num * 4) & 0xf;
820 if (tt == TT_4X4 || tt == TT_4X8)
821 v->vc1dsp.vc1_h_loop_filter8(dst + 4, 2 * linesize, pq);
822 if (!(flags & RIGHT_EDGE) || block_num == 0)
823 v->vc1dsp.vc1_h_loop_filter8(dst + 8, 2 * linesize, pq);
825 if (tt == TT_4X4 || tt == TT_4X8)
826 v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 4, 2 * linesize, pq);
827 if (!(flags & RIGHT_EDGE) || block_num == 2)
828 v->vc1dsp.vc1_h_loop_filter8(dst - 7 * linesize + 8, 2 * linesize, pq);
831 if(tt == TT_4X4 || tt == TT_4X8) {
832 v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
833 v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
835 if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
836 v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
837 v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
841 if (tt == TT_4X4 || tt == TT_4X8) {
842 v->vc1dsp.vc1_h_loop_filter4(dst + 4, 2 * linesize, pq);
843 v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 4, 2 * linesize, pq);
845 if (!(flags & RIGHT_EDGE)) {
846 v->vc1dsp.vc1_h_loop_filter4(dst + 8, 2 * linesize, pq);
847 v->vc1dsp.vc1_h_loop_filter4(dst + linesize + 8, 2 * linesize, pq);
852 static av_always_inline void vc1_p_v_intfr_loop_filter(VC1Context *v, uint8_t *dest, int *ttblk,
853 uint32_t flags, uint8_t fieldtx, int block_num)
855 MpegEncContext *s = &v->s;
858 int linesize = block_num > 3 ? s->uvlinesize : s->linesize;
864 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
866 tt = ttblk[0] >> (block_num * 4) & 0xf;
870 if (tt == TT_4X4 || tt == TT_8X4)
871 v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
872 if (!(flags & BOTTOM_EDGE))
873 v->vc1dsp.vc1_v_loop_filter8(dst + 16 * linesize, 2 * linesize, pq);
875 if (tt == TT_4X4 || tt == TT_8X4)
876 v->vc1dsp.vc1_v_loop_filter8(dst + linesize, 2 * linesize, pq);
877 if (!(flags & BOTTOM_EDGE))
878 v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
882 if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
883 v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
884 v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
886 v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
887 v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
888 } else if (!(flags & BOTTOM_EDGE)) {
889 if (tt == TT_4X4 || tt == TT_8X4) {
890 v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
891 v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
893 v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
894 v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
898 if (!(flags & BOTTOM_EDGE)) {
899 if (!(flags & TOP_EDGE) && (tt == TT_4X4 || tt == TT_8X4)) {
900 v->vc1dsp.vc1_v_loop_filter8(dst + 4 * linesize, 2 * linesize, pq);
901 v->vc1dsp.vc1_v_loop_filter8(dst + 5 * linesize, 2 * linesize, pq);
903 v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, 2 * linesize, pq);
904 v->vc1dsp.vc1_v_loop_filter8(dst + 9 * linesize, 2 * linesize, pq);
909 void ff_vc1_p_intfr_loop_filter(VC1Context *v)
911 MpegEncContext *s = &v->s;
912 int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
913 int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
920 /* Within a MB, the vertical loop filter always runs before the horizontal.
921 * To accomplish that, we run the V loop filter on all applicable
922 * horizontal borders of the MB above the last overlap filtered MB. Then,
923 * we wait for the loop filter iteration on the next row and next column to
924 * do H loop filter on all applicable vertical borders of this MB.
925 * Therefore, the loop filter trails by two rows and one column relative to
926 * the overlap filter and two rows and two columns relative to the decoding
929 if (s->mb_y >= s->start_mb_y + 1) {
930 dest = s->dest[0] - 16 * s->linesize - 16;
931 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
932 flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
933 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
934 for (i = 0; i < block_count; i++)
935 vc1_p_v_intfr_loop_filter(v,
936 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
943 if (s->mb_x == s->mb_width - 1) {
944 if (s->mb_y >= s->start_mb_y + 1) {
945 dest = s->dest[0] - 16 * s->linesize;
946 ttblk = &v->ttblk[s->mb_x - s->mb_stride];
947 flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
948 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
949 for (i = 0; i < block_count; i++)
950 vc1_p_v_intfr_loop_filter(v,
951 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
958 if (s->mb_y == s->end_mb_y - 1) {
960 dest = s->dest[0] - 16;
961 ttblk = &v->ttblk[s->mb_x - 1];
962 flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
963 fieldtx = v->fieldtx_plane[mb_pos - 1];
964 for (i = 0; i < block_count; i++)
965 vc1_p_v_intfr_loop_filter(v,
966 i > 3 ? s->dest[i - 3] - 8 : dest,
972 if (s->mb_x == s->mb_width - 1) {
974 ttblk = &v->ttblk[s->mb_x];
975 flags = s->mb_y == s->start_mb_y ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
976 fieldtx = v->fieldtx_plane[mb_pos];
977 for (i = 0; i < block_count; i++)
978 vc1_p_v_intfr_loop_filter(v,
979 i > 3 ? s->dest[i - 3] : dest,
987 if (s->mb_y >= s->start_mb_y + 2) {
989 dest = s->dest[0] - 32 * s->linesize - 32;
990 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 2];
991 flags = s->mb_x == 2 ? LEFT_EDGE : 0;
992 fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 2];
993 for (i = 0; i < block_count; i++)
994 vc1_p_h_intfr_loop_filter(v,
995 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 16 : dest,
1001 if (s->mb_x == s->mb_width - 1) {
1003 dest = s->dest[0] - 32 * s->linesize - 16;
1004 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride - 1];
1005 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1006 fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride - 1];
1007 for (i = 0; i < block_count; i++)
1008 vc1_p_h_intfr_loop_filter(v,
1009 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest,
1015 dest = s->dest[0] - 32 * s->linesize;
1016 ttblk = &v->ttblk[s->mb_x - 2 * s->mb_stride];
1017 flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
1018 fieldtx = v->fieldtx_plane[mb_pos - 2 * s->mb_stride];
1019 for (i = 0; i < block_count; i++)
1020 vc1_p_h_intfr_loop_filter(v,
1021 i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize : dest,
1028 if (s->mb_y == s->end_mb_y - 1) {
1029 if (s->mb_y >= s->start_mb_y + 1) {
1031 dest = s->dest[0] - 16 * s->linesize - 32;
1032 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 2];
1033 flags = s->mb_x == 2 ? LEFT_EDGE : 0;
1034 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 2];
1035 for (i = 0; i < block_count; i++)
1036 vc1_p_h_intfr_loop_filter(v,
1037 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 16 : dest,
1043 if (s->mb_x == s->mb_width - 1) {
1045 dest = s->dest[0] - 16 * s->linesize - 16;
1046 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
1047 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1048 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride - 1];
1049 for (i = 0; i < block_count; i++)
1050 vc1_p_h_intfr_loop_filter(v,
1051 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest,
1057 dest = s->dest[0] - 16 * s->linesize;
1058 ttblk = &v->ttblk[s->mb_x - s->mb_stride];
1059 flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
1060 fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride];
1061 for (i = 0; i < block_count; i++)
1062 vc1_p_h_intfr_loop_filter(v,
1063 i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest,
1071 dest = s->dest[0] - 32;
1072 ttblk = &v->ttblk[s->mb_x - 2];
1073 flags = s->mb_x == 2 ? LEFT_EDGE : 0;
1074 fieldtx = v->fieldtx_plane[mb_pos - 2];
1075 for (i = 0; i < block_count; i++)
1076 vc1_p_h_intfr_loop_filter(v,
1077 i > 3 ? s->dest[i - 3] - 16 : dest,
1083 if (s->mb_x == s->mb_width - 1) {
1085 dest = s->dest[0] - 16;
1086 ttblk = &v->ttblk[s->mb_x - 1];
1087 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1088 fieldtx = v->fieldtx_plane[mb_pos - 1];
1089 for (i = 0; i < block_count; i++)
1090 vc1_p_h_intfr_loop_filter(v,
1091 i > 3 ? s->dest[i - 3] - 8 : dest,
1098 ttblk = &v->ttblk[s->mb_x];
1099 flags = s->mb_x ? RIGHT_EDGE : LEFT_EDGE | RIGHT_EDGE;
1100 fieldtx = v->fieldtx_plane[mb_pos];
1101 for (i = 0; i < block_count; i++)
1102 vc1_p_h_intfr_loop_filter(v,
1103 i > 3 ? s->dest[i - 3] : dest,
1112 static av_always_inline void vc1_b_h_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
1113 int *ttblk, uint32_t flags, int block_num)
1115 MpegEncContext *s = &v->s;
1118 uint32_t block_cbp = cbp[0] >> (block_num * 4);
1120 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
1125 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
1127 if (!(flags & RIGHT_EDGE) || !(block_num & 5)) {
1129 v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
1131 v->vc1dsp.vc1_h_loop_filter8(dst + 8, linesize, pq);
1134 tt = ttblk[0] >> (block_num * 4) & 0xf;
1135 if (tt == TT_4X4 || tt == TT_4X8) {
1136 idx = (block_cbp | (block_cbp >> 1)) & 5;
1138 v->vc1dsp.vc1_h_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
1140 v->vc1dsp.vc1_h_loop_filter4(dst + 4, linesize, pq);
1144 static av_always_inline void vc1_b_v_intfi_loop_filter(VC1Context *v, uint8_t *dest, uint32_t *cbp,
1145 int *ttblk, uint32_t flags, int block_num)
1147 MpegEncContext *s = &v->s;
1150 uint32_t block_cbp = cbp[0] >> (block_num * 4);
1152 int idx, linesize = block_num > 3 ? s->uvlinesize : s->linesize;
1157 dst = dest + (block_num & 2) * 4 * s->linesize + (block_num & 1) * 8;
1159 if(!(flags & BOTTOM_EDGE) || block_num < 2)
1160 v->vc1dsp.vc1_v_loop_filter8(dst + 8 * linesize, linesize, pq);
1162 tt = ttblk[0] >> (block_num * 4) & 0xf;
1163 if (tt == TT_4X4 || tt == TT_8X4) {
1164 idx = (block_cbp | (block_cbp >> 2)) & 3;
1166 v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize + 4, linesize, pq);
1168 v->vc1dsp.vc1_v_loop_filter4(dst + 4 * linesize, linesize, pq);
1172 void ff_vc1_b_intfi_loop_filter(VC1Context *v)
1174 MpegEncContext *s = &v->s;
1175 int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6;
1182 /* Within a MB, the vertical loop filter always runs before the horizontal.
1183 * To accomplish that, we run the V loop filter on all applicable
1184 * horizontal borders of the MB above the currently decoded MB. Then,
1185 * we wait for the next loop filter iteration to do H loop filter on all
1186 * applicable vertical borders of this MB. Therefore, the loop filter
1187 * trails by one row and one column relative to the decoding loop. */
1188 if (!s->first_slice_line) {
1189 dest = s->dest[0] - 16 * s->linesize;
1190 cbp = &v->cbp[s->mb_x - s->mb_stride];
1191 ttblk = &v->ttblk[s->mb_x - s->mb_stride];
1192 flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0;
1193 for (i = 0; i < block_count; i++)
1194 vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
1196 if (s->mb_y == s->end_mb_y - 1) {
1198 cbp = &v->cbp[s->mb_x];
1199 ttblk = &v->ttblk[s->mb_x];
1200 flags = s->first_slice_line ? TOP_EDGE | BOTTOM_EDGE : BOTTOM_EDGE;
1201 for (i = 0; i < block_count; i++)
1202 vc1_b_v_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);
1205 if (!s->first_slice_line) {
1206 dest = s->dest[0] - 16 * s->linesize - 16;
1207 cbp = &v->cbp[s->mb_x - s->mb_stride - 1];
1208 ttblk = &v->ttblk[s->mb_x - s->mb_stride - 1];
1210 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1211 for (i = 0; i < block_count; i++)
1212 vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, cbp, ttblk, flags, i);
1214 if (s->mb_x == s->mb_width - 1) {
1218 flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
1219 for (i = 0; i < block_count; i++)
1220 vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize : dest, cbp, ttblk, flags, i);
1223 if (s->mb_y == s->end_mb_y - 1) {
1224 dest = s->dest[0] - 16;
1225 cbp = &v->cbp[s->mb_x - 1];
1226 ttblk = &v->ttblk[s->mb_x - 1];
1228 flags = s->mb_x == 1 ? LEFT_EDGE : 0;
1229 for (i = 0; i < block_count; i++)
1230 vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, cbp, ttblk, flags, i);
1232 if (s->mb_x == s->mb_width - 1) {
1236 flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE;
1237 for (i = 0; i < block_count; i++)
1238 vc1_b_h_intfi_loop_filter(v, i > 3 ? s->dest[i - 3] : dest, cbp, ttblk, flags, i);