X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fvc1_loopfilter.c;h=0f990cccefea9265a5256b160fe4069dfde53cd7;hb=c950beb68dee016e0e0a1b729d40abf700d32d1a;hp=4c0de7c025557b812e037331cae375dbb0e9807f;hpb=02e4970bc9d3215f862a5d64ec48922d98eb17c1;p=ffmpeg diff --git a/libavcodec/vc1_loopfilter.c b/libavcodec/vc1_loopfilter.c index 4c0de7c0255..0f990cccefe 100644 --- a/libavcodec/vc1_loopfilter.c +++ b/libavcodec/vc1_loopfilter.c @@ -31,59 +31,74 @@ #include "vc1.h" #include "vc1dsp.h" -void ff_vc1_loop_filter_iblk(VC1Context *v, int pq) +static av_always_inline void vc1_h_overlap_filter(VC1Context *v, int16_t (*left_block)[64], + int16_t (*right_block)[64], int left_fieldtx, + int right_fieldtx, int block_num) { - MpegEncContext *s = &v->s; - int j; - if (!s->first_slice_line) { - v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq); - if (s->mb_x) - v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize, s->linesize, pq); - v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16 * s->linesize + 8, s->linesize, pq); - if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) - for (j = 0; j < 2; j++) { - v->vc1dsp.vc1_v_loop_filter8(s->dest[j + 1], s->uvlinesize, pq); - if (s->mb_x) - v->vc1dsp.vc1_h_loop_filter8(s->dest[j + 1] - 8 * s->uvlinesize, s->uvlinesize, pq); - } - } - v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8 * s->linesize, s->linesize, pq); + switch (block_num) { + case 0: + v->vc1dsp.vc1_h_s_overlap(left_block[2], + right_block[0], + left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8, + left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8, + left_fieldtx || right_fieldtx ? 0 : 1); + break; - if (s->mb_y == s->end_mb_y - 1) { - if (s->mb_x) { - v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq); - if (!CONFIG_GRAY || !(s->avctx->flags & AV_CODEC_FLAG_GRAY)) { - v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq); - v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq); - } - } - v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq); - } -} + case 1: + v->vc1dsp.vc1_h_s_overlap(right_block[0], + right_block[2], + 8, + 8, + right_fieldtx ? 0 : 1); + break; -static av_always_inline void vc1_h_overlap_filter(VC1Context *v, int16_t (*left_block)[64], - int16_t (*right_block)[64], int block_num) -{ - if (left_block != right_block || (block_num & 5) == 1) { - if (block_num > 3) - v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num]); - else if (block_num & 1) - v->vc1dsp.vc1_h_s_overlap(right_block[block_num - 1], right_block[block_num]); - else - v->vc1dsp.vc1_h_s_overlap(left_block[block_num + 1], right_block[block_num]); + case 2: + v->vc1dsp.vc1_h_s_overlap(!left_fieldtx && right_fieldtx ? left_block[2] + 8 : left_block[3], + left_fieldtx && !right_fieldtx ? right_block[0] + 8 : right_block[1], + left_fieldtx ^ right_fieldtx ? 16 - 8 * left_fieldtx : 8, + left_fieldtx ^ right_fieldtx ? 16 - 8 * right_fieldtx : 8, + left_fieldtx || right_fieldtx ? 2 : 1); + break; + + case 3: + v->vc1dsp.vc1_h_s_overlap(right_block[1], + right_block[3], + 8, + 8, + right_fieldtx ? 2 : 1); + break; + + case 4: + case 5: + v->vc1dsp.vc1_h_s_overlap(left_block[block_num], right_block[block_num], 8, 8, 1); + break; } } static av_always_inline void vc1_v_overlap_filter(VC1Context *v, int16_t (*top_block)[64], int16_t (*bottom_block)[64], int block_num) { - if (top_block != bottom_block || block_num & 2) { - if (block_num > 3) - v->vc1dsp.vc1_v_s_overlap(top_block[block_num], bottom_block[block_num]); - else if (block_num & 2) - v->vc1dsp.vc1_v_s_overlap(bottom_block[block_num - 2], bottom_block[block_num]); - else - v->vc1dsp.vc1_v_s_overlap(top_block[block_num + 2], bottom_block[block_num]); + switch (block_num) { + case 0: + v->vc1dsp.vc1_v_s_overlap(top_block[1], bottom_block[0]); + break; + + case 1: + v->vc1dsp.vc1_v_s_overlap(top_block[3], bottom_block[2]); + break; + + case 2: + v->vc1dsp.vc1_v_s_overlap(bottom_block[0], bottom_block[1]); + break; + + case 3: + v->vc1dsp.vc1_v_s_overlap(bottom_block[2], bottom_block[3]); + break; + + case 4: + case 5: + v->vc1dsp.vc1_v_s_overlap(top_block[block_num], bottom_block[block_num]); + break; } } @@ -108,22 +123,38 @@ void ff_vc1_i_overlap_filter(VC1Context *v) * borders. Therefore, the H overlap trails by one MB col and the * V overlap trails by one MB row. This is reflected in the time at which * we run the put_pixels loop, i.e. delayed by one row and one column. */ - for (i = 0; i < block_count; i++) - if (v->pq >= 9 || v->condover == CONDOVER_ALL || - (v->over_flags_plane[mb_pos] && ((i & 5) == 1 || (s->mb_x && v->over_flags_plane[mb_pos - 1])))) - vc1_h_overlap_filter(v, s->mb_x ? left_blk : cur_blk, cur_blk, i); + for (i = 0; i < block_count; i++) { + if (s->mb_x == 0 && (i & 5) != 1) + continue; + + if (v->pq >= 9 || (v->profile == PROFILE_ADVANCED && + (v->condover == CONDOVER_ALL || + (v->over_flags_plane[mb_pos] && + ((i & 5) == 1 || v->over_flags_plane[mb_pos - 1]))))) + vc1_h_overlap_filter(v, + s->mb_x ? left_blk : cur_blk, cur_blk, + v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1], + v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos], + i); + } if (v->fcm != ILACE_FRAME) for (i = 0; i < block_count; i++) { - if (s->mb_x && (v->pq >= 9 || v->condover == CONDOVER_ALL || - (v->over_flags_plane[mb_pos - 1] && - ((i & 2) || (!s->first_slice_line && v->over_flags_plane[mb_pos - 1 - s->mb_stride]))))) + if (s->first_slice_line && !(i & 2)) + continue; + + if (s->mb_x && + (v->pq >= 9 || (v->profile == PROFILE_ADVANCED && + (v->condover == CONDOVER_ALL || + (v->over_flags_plane[mb_pos - 1] && + ((i & 2) || v->over_flags_plane[mb_pos - 1 - s->mb_stride])))))) vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i); - if (s->mb_x == s->mb_width - 1) - if (v->pq >= 9 || v->condover == CONDOVER_ALL || - (v->over_flags_plane[mb_pos] && - ((i & 2) || (!s->first_slice_line && v->over_flags_plane[mb_pos - s->mb_stride])))) - vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i); + if (s->mb_x == s->mb_width - 1 && + (v->pq >= 9 || (v->profile == PROFILE_ADVANCED && + (v->condover == CONDOVER_ALL || + (v->over_flags_plane[mb_pos] && + ((i & 2) || v->over_flags_plane[mb_pos - s->mb_stride])))))) + vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i); } } @@ -132,6 +163,7 @@ void ff_vc1_p_overlap_filter(VC1Context *v) MpegEncContext *s = &v->s; int16_t (*topleft_blk)[64], (*top_blk)[64], (*left_blk)[64], (*cur_blk)[64]; int block_count = CONFIG_GRAY && (s->avctx->flags & AV_CODEC_FLAG_GRAY) ? 4 : 6; + int mb_pos = s->mb_x + s->mb_y * s->mb_stride; int i; topleft_blk = v->block[v->topleft_blk_idx]; @@ -139,18 +171,29 @@ void ff_vc1_p_overlap_filter(VC1Context *v) left_blk = v->block[v->left_blk_idx]; cur_blk = v->block[v->cur_blk_idx]; - for (i = 0; i < block_count; i++) - if (v->mb_type[0][s->block_index[i]] && (s->mb_x == 0 || v->mb_type[0][s->block_index[i] - 1])) - vc1_h_overlap_filter(v, s->mb_x ? left_blk : cur_blk, cur_blk, i); + for (i = 0; i < block_count; i++) { + if (s->mb_x == 0 && (i & 5) != 1) + continue; + + if (v->mb_type[0][s->block_index[i]] && v->mb_type[0][s->block_index[i] - 1]) + vc1_h_overlap_filter(v, + s->mb_x ? left_blk : cur_blk, cur_blk, + v->fcm == ILACE_FRAME && s->mb_x && v->fieldtx_plane[mb_pos - 1], + v->fcm == ILACE_FRAME && v->fieldtx_plane[mb_pos], + i); + } if (v->fcm != ILACE_FRAME) for (i = 0; i < block_count; i++) { - if (s->mb_x && v->mb_type[0][s->block_index[i] - 1] && - (s->first_slice_line || v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 1])) + if (s->first_slice_line && !(i & 2)) + continue; + + if (s->mb_x && v->mb_type[0][s->block_index[i] - 2 + (i > 3)] && + v->mb_type[0][s->block_index[i] - s->block_wrap[i] - 2 + (i > 3)]) vc1_v_overlap_filter(v, s->first_slice_line ? left_blk : topleft_blk, left_blk, i); if (s->mb_x == s->mb_width - 1) if (v->mb_type[0][s->block_index[i]] && - (s->first_slice_line || v->mb_type[0][s->block_index[i] - s->block_wrap[i]])) + v->mb_type[0][s->block_index[i] - s->block_wrap[i]]) vc1_v_overlap_filter(v, s->first_slice_line ? cur_blk : top_blk, cur_blk, i); } } @@ -241,7 +284,7 @@ void ff_vc1_i_loop_filter(VC1Context *v) * bottom edge of this MB, before moving over and running the H loop * filter on the left and internal vertical borders. Therefore, the loop * filter trails by one row and one column relative to the overlap filter - * and two rows and two colums relative to the decoding loop. */ + * and two rows and two columns relative to the decoding loop. */ if (!s->first_slice_line) { dest = s->dest[0] - 16 * s->linesize - 16; flags = s->mb_y == s->start_mb_y + 1 ? TOP_EDGE : 0; @@ -250,7 +293,7 @@ void ff_vc1_i_loop_filter(VC1Context *v) for (i = 0; i < block_count; i++) vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, fieldtx, i); } - if (s->mb_x == s->mb_width - 1) { + if (s->mb_x == v->end_mb_x - 1) { dest += 16; fieldtx = v->fieldtx_plane[mb_pos - s->mb_stride]; for (i = 0; i < block_count; i++) @@ -265,7 +308,7 @@ void ff_vc1_i_loop_filter(VC1Context *v) for (i = 0; i < block_count; i++) vc1_i_v_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, fieldtx, i); } - if (s->mb_x == s->mb_width - 1) { + if (s->mb_x == v->end_mb_x - 1) { dest += 16; fieldtx = v->fieldtx_plane[mb_pos]; for (i = 0; i < block_count; i++) @@ -280,7 +323,7 @@ void ff_vc1_i_loop_filter(VC1Context *v) for (i = 0; i < block_count; i++) vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 16 * s->uvlinesize - 8 : dest, flags, i); } - if (s->mb_x == s->mb_width - 1) { + if (s->mb_x == v->end_mb_x - 1) { dest += 16; flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE; for (i = 0; i < block_count; i++) @@ -295,7 +338,7 @@ void ff_vc1_i_loop_filter(VC1Context *v) for (i = 0; i < block_count; i++) vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 * s->uvlinesize - 8 : dest, flags, i); } - if (s->mb_x == s->mb_width - 1) { + if (s->mb_x == v->end_mb_x - 1) { flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE; dest += 16; for (i = 0; i < block_count; i++) @@ -308,7 +351,7 @@ void ff_vc1_i_loop_filter(VC1Context *v) for (i = 0; i < block_count; i++) vc1_i_h_loop_filter(v, i > 3 ? s->dest[i - 3] - 8 : dest, flags, i); } - if (s->mb_x == s->mb_width - 1) { + if (s->mb_x == v->end_mb_x - 1) { dest += 16; flags = s->mb_x == 0 ? LEFT_EDGE | RIGHT_EDGE : RIGHT_EDGE; for (i = 0; i < block_count; i++) @@ -442,7 +485,7 @@ void ff_vc1_p_loop_filter(VC1Context *v) * we wait for the next loop filter iteration to do H loop filter on all * applicable vertical borders of this MB. Therefore, the loop filter * trails by one row and one column relative to the overlap filter and two - * rows and two colums relative to the decoding loop. */ + * rows and two columns relative to the decoding loop. */ if (s->mb_y >= s->start_mb_y + 2) { if (s->mb_x) { dest = s->dest[0] - 32 * s->linesize - 16; @@ -880,7 +923,7 @@ void ff_vc1_p_intfr_loop_filter(VC1Context *v) * we wait for the loop filter iteration on the next row and next column to * do H loop filter on all applicable vertical borders of this MB. * Therefore, the loop filter trails by two rows and one column relative to - * the overlap filter and two rows and two colums relative to the decoding + * the overlap filter and two rows and two columns relative to the decoding * loop. */ if (s->mb_x) { if (s->mb_y >= s->start_mb_y + 1) {