2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * Fast Simple Post-processing filter
26 * This implementation is based on an algorithm described in
27 * "Aria Nosratinia Embedded Post-Processing for
28 * Enhancement of Compressed Images (1999)"
29 * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30 * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31 * them can be performed once per block, not per pixel. This allows for much
34 * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35 * project, and ported by Arwa Arif for FFmpeg.
38 #include "libavutil/avassert.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/pixdesc.h"
45 #define OFFSET(x) offsetof(FSPPContext, x)
46 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
47 static const AVOption fspp_options[] = {
48 { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
49 { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
50 { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
51 { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS },
55 AVFILTER_DEFINE_CLASS(fspp);
57 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
58 { 0, 48, 12, 60, 3, 51, 15, 63, },
59 { 32, 16, 44, 28, 35, 19, 47, 31, },
60 { 8, 56, 4, 52, 11, 59, 7, 55, },
61 { 40, 24, 36, 20, 43, 27, 39, 23, },
62 { 2, 50, 14, 62, 1, 49, 13, 61, },
63 { 34, 18, 46, 30, 33, 17, 45, 29, },
64 { 10, 58, 6, 54, 9, 57, 5, 53, },
65 { 42, 26, 38, 22, 41, 25, 37, 21, },
68 static const short custom_threshold[64] = {
69 // values (296) can't be too high
70 // -it causes too big quant dependence
71 // or maybe overflow(check), which results in some flashing
72 71, 296, 295, 237, 71, 40, 38, 19,
73 245, 193, 185, 121, 102, 73, 53, 27,
74 158, 129, 141, 107, 97, 73, 50, 26,
75 102, 116, 109, 98, 82, 66, 45, 23,
76 71, 94, 95, 81, 70, 56, 38, 20,
77 56, 77, 74, 66, 56, 44, 30, 15,
78 38, 53, 50, 45, 38, 30, 21, 11,
79 20, 27, 26, 23, 20, 15, 11, 5
82 //This func reads from 1 slice, 1 and clears 0 & 1
83 static void store_slice_c(uint8_t *dst, int16_t *src,
84 ptrdiff_t dst_stride, ptrdiff_t src_stride,
85 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
89 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
90 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
91 if (temp & 0x100) temp = ~(temp >> 31); \
94 for (y = 0; y < height; y++) {
95 const uint8_t *d = dither[y];
96 for (x = 0; x < width; x += 8) {
112 //This func reads from 2 slices, 0 & 2 and clears 2-nd
113 static void store_slice2_c(uint8_t *dst, int16_t *src,
114 ptrdiff_t dst_stride, ptrdiff_t src_stride,
115 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
118 #define STORE2(pos) \
119 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
120 src[x + pos + 16 * src_stride] = 0; \
121 if (temp & 0x100) temp = ~(temp >> 31); \
124 for (y = 0; y < height; y++) {
125 const uint8_t *d = dither[y];
126 for (x = 0; x < width; x += 8) {
142 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
145 for (a = 0; a < 64; a++)
146 thr_adr[a] = q * thr_adr_noq[a];
149 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
150 int dst_stride, int src_stride,
151 int width, int height,
152 uint8_t *qp_store, int qp_stride, int is_luma)
154 int x, x0, y, es, qy, t;
156 const int stride = is_luma ? p->temp_stride : (width + 16);
157 const int step = 6 - p->log2_count;
158 const int qpsh = 4 - p->hsub * !is_luma;
159 const int qpsv = 4 - p->vsub * !is_luma;
161 DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
162 int16_t *block = (int16_t *)block_align;
163 int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
165 memset(block3, 0, 4 * 8 * BLOCKSZ);
167 if (!src || !dst) return;
169 for (y = 0; y < height; y++) {
170 int index = 8 + 8 * stride + y * stride;
171 memcpy(p->src + index, src + y * src_stride, width);
172 for (x = 0; x < 8; x++) {
173 p->src[index - x - 1] = p->src[index + x ];
174 p->src[index + width + x ] = p->src[index + width - x - 1];
178 for (y = 0; y < 8; y++) {
179 memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
180 memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
182 //FIXME (try edge emu)
184 for (y = 8; y < 24; y++)
185 memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
187 for (y = step; y < height + 8; y += step) { //step= 1,2
188 const int y1 = y - 8 + step; //l5-7 l4-6;
191 if (qy > height - 1) qy = height - 1;
194 qy = (qy >> qpsv) * qp_stride;
195 p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
197 for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
198 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
201 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
203 for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
204 t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
206 if (t < 0) t = 0; //t always < width-2
208 t = qp_store[qy + (t >> qpsh)];
209 t = ff_norm_qscale(t, p->qscale_type);
211 if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
212 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
214 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
215 memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
216 memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
219 es = width + 8 - x0; // 8, ...
221 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
223 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
224 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
226 if (!(y1 & 7) && y1) {
228 p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
229 dst_stride, stride, width, 8, 5 - p->log2_count);
231 p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
232 dst_stride, stride, width, 8, 5 - p->log2_count);
236 if (y & 7) { // height % 8 != 0
238 p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
239 dst_stride, stride, width, y&7, 5 - p->log2_count);
241 p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
242 dst_stride, stride, width, y&7, 5 - p->log2_count);
246 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
248 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
249 int_simd16_t tmp10, tmp11, tmp12, tmp13;
250 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
251 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
261 for (; cnt > 0; cnt -= 2) { //start positions
262 threshold = (int16_t *)thr_adr;//threshold_mtx
263 for (ctr = DCTSIZE; ctr > 0; ctr--) {
264 // Process columns from input, add to output.
265 tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
266 tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
268 tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
269 tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
271 tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
272 tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
274 tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
275 tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
287 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
293 THRESHOLD(tmp0, d0, threshold[0 * 8]);
294 THRESHOLD(tmp1, d2, threshold[2 * 8]);
295 THRESHOLD(tmp2, d4, threshold[4 * 8]);
296 THRESHOLD(tmp3, d6, threshold[6 * 8]);
298 tmp10 = (tmp0 + tmp2) >> 2;
299 tmp11 = (tmp0 - tmp2) >> 2;
301 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
302 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
304 tmp0 = tmp10 + tmp13; //->temps
305 tmp3 = tmp10 - tmp13; //->temps
306 tmp1 = tmp11 + tmp12; //->temps
307 tmp2 = tmp11 - tmp12; //->temps
315 z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
316 z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
317 z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
318 z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
330 THRESHOLD(tmp4, d1, threshold[1 * 8]);
331 THRESHOLD(tmp5, d3, threshold[3 * 8]);
332 THRESHOLD(tmp6, d5, threshold[5 * 8]);
333 THRESHOLD(tmp7, d7, threshold[7 * 8]);
335 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
337 z10 = (tmp6 - tmp5) << 1;
339 z12 = (tmp4 - tmp7) << 1;
341 tmp7 = (z11 + z13) >> 2; //+2 !
342 tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
343 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
344 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
345 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
351 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
352 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
353 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
354 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
355 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
356 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
357 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
358 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
360 dataptr++; //next column
364 dataptr += 8; //skip each second start pos
369 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
371 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
372 int_simd16_t tmp10, tmp11, tmp12, tmp13;
373 int_simd16_t z5, z10, z11, z12, z13;
380 for (; cnt > 0; cnt--) {
382 //Simd version reads 4x4 block and transposes it
383 tmp10 = wsptr[2] + wsptr[3];
384 tmp11 = wsptr[2] - wsptr[3];
386 tmp13 = wsptr[0] + wsptr[1];
387 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
389 tmp0 = tmp10 + tmp13; //->temps
390 tmp3 = tmp10 - tmp13; //->temps
391 tmp1 = tmp11 + tmp12;
392 tmp2 = tmp11 - tmp12;
395 //Also transpose, with previous:
397 // ---- ---- idct ||||
398 // ---- ---- ---> ||||
400 z13 = wsptr[4] + wsptr[5];
401 z10 = wsptr[4] - wsptr[5];
402 z11 = wsptr[6] + wsptr[7];
403 z12 = wsptr[6] - wsptr[7];
406 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
408 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
409 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
410 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
412 tmp6 = (tmp12 << 3) - tmp7;
413 tmp5 = (tmp11 << 3) - tmp6;
414 tmp4 = (tmp10 << 3) + tmp5;
416 // Final output stage: descale and write column
417 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
418 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
419 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
420 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
421 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
422 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
423 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
424 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
427 wsptr += DCTSIZE; // advance pointer to next row
431 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
433 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
434 int_simd16_t tmp10, tmp11, tmp12, tmp13;
435 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
439 // Pass 1: process rows.
442 for (; cnt > 0; cnt--) {
443 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
444 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
445 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
446 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
447 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
448 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
449 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
450 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
458 //Even columns are written first, this leads to different order of columns
459 //in column_fidct(), but they are processed independently, so all ok.
460 //Later in the row_idct() columns readed at the same order.
461 dataptr[2] = tmp10 + tmp11;
462 dataptr[3] = tmp10 - tmp11;
464 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
465 dataptr[0] = tmp13 + z1;
466 dataptr[1] = tmp13 - z1;
470 tmp10 = (tmp4 + tmp5) << 2;
471 tmp11 = (tmp5 + tmp6) << 2;
472 tmp12 = (tmp6 + tmp7) << 2;
474 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
475 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
476 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
477 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
482 dataptr[4] = z13 + z2;
483 dataptr[5] = z13 - z2;
484 dataptr[6] = z11 + z4;
485 dataptr[7] = z11 - z4;
487 pixels++; // advance pointer to next column
492 static int query_formats(AVFilterContext *ctx)
494 static const enum PixelFormat pix_fmts[] = {
495 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
496 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
497 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
498 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
499 AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
500 AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8,
503 ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
507 static int config_input(AVFilterLink *inlink)
509 AVFilterContext *ctx = inlink->dst;
510 FSPPContext *fspp = ctx->priv;
511 const int h = FFALIGN(inlink->h + 16, 16);
512 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
514 fspp->hsub = desc->log2_chroma_w;
515 fspp->vsub = desc->log2_chroma_h;
517 fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
518 fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
519 fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
521 if (!fspp->temp || !fspp->src)
522 return AVERROR(ENOMEM);
524 if (!fspp->use_bframe_qp && !fspp->qp) {
525 fspp->non_b_qp_alloc_size = FF_CEIL_RSHIFT(inlink->w, 4) * FF_CEIL_RSHIFT(inlink->h, 4);
526 fspp->non_b_qp_table = av_calloc(fspp->non_b_qp_alloc_size, sizeof(*fspp->non_b_qp_table));
527 if (!fspp->non_b_qp_table)
528 return AVERROR(ENOMEM);
531 fspp->store_slice = store_slice_c;
532 fspp->store_slice2 = store_slice2_c;
533 fspp->mul_thrmat = mul_thrmat_c;
534 fspp->column_fidct = column_fidct_c;
535 fspp->row_idct = row_idct_c;
536 fspp->row_fdct = row_fdct_c;
539 ff_fspp_init_x86(fspp);
544 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
546 AVFilterContext *ctx = inlink->dst;
547 FSPPContext *fspp = ctx->priv;
548 AVFilterLink *outlink = ctx->outputs[0];
552 uint8_t *qp_table = NULL;
554 int custom_threshold_m[64];
556 bias = (1 << 4) + fspp->strength;
558 for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
559 custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
561 for (i = 0; i < 8; i++) {
562 fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
563 |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
564 |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
565 |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
567 fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
568 |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
569 |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
570 |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
574 fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
576 /* if we are not in a constant user quantizer mode and we don't want to use
577 * the quantizers from the B-frames (B-frames often have a higher QP), we
578 * need to save the qp table from the last non B-frame; this is what the
579 * following code block does */
581 qp_table = av_frame_get_qp_table(in, &qp_stride, &fspp->qscale_type);
583 if (qp_table && !fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
586 /* if the qp stride is not set, it means the QP are only defined on
589 w = FF_CEIL_RSHIFT(inlink->w, 4);
593 h = FF_CEIL_RSHIFT(inlink->h, 4);
595 if (w * h > fspp->non_b_qp_alloc_size) {
596 int ret = av_reallocp_array(&fspp->non_b_qp_table, w, h);
598 fspp->non_b_qp_alloc_size = 0;
601 fspp->non_b_qp_alloc_size = w * h;
604 av_assert0(w * h <= fspp->non_b_qp_alloc_size);
605 memcpy(fspp->non_b_qp_table, qp_table, w * h);
609 if (fspp->log2_count && !ctx->is_disabled) {
610 if (!fspp->use_bframe_qp && fspp->non_b_qp_table)
611 qp_table = fspp->non_b_qp_table;
613 if (qp_table || fspp->qp) {
614 const int cw = FF_CEIL_RSHIFT(inlink->w, fspp->hsub);
615 const int ch = FF_CEIL_RSHIFT(inlink->h, fspp->vsub);
617 /* get a new frame if in-place is not possible or if the dimensions
618 * are not multiple of 8 */
619 if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
620 const int aligned_w = FFALIGN(inlink->w, 8);
621 const int aligned_h = FFALIGN(inlink->h, 8);
623 out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
626 return AVERROR(ENOMEM);
628 av_frame_copy_props(out, in);
631 filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
632 inlink->w, inlink->h, qp_table, qp_stride, 1);
633 filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
634 cw, ch, qp_table, qp_stride, 0);
635 filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
636 cw, ch, qp_table, qp_stride, 0);
643 av_image_copy_plane(out->data[3], out->linesize[3],
644 in ->data[3], in ->linesize[3],
645 inlink->w, inlink->h);
648 return ff_filter_frame(outlink, out);
651 static av_cold void uninit(AVFilterContext *ctx)
653 FSPPContext *fspp = ctx->priv;
654 av_freep(&fspp->temp);
655 av_freep(&fspp->src);
656 av_freep(&fspp->non_b_qp_table);
659 static const AVFilterPad fspp_inputs[] = {
662 .type = AVMEDIA_TYPE_VIDEO,
663 .config_props = config_input,
664 .filter_frame = filter_frame,
669 static const AVFilterPad fspp_outputs[] = {
672 .type = AVMEDIA_TYPE_VIDEO,
677 AVFilter ff_vf_fspp = {
679 .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
680 .priv_size = sizeof(FSPPContext),
682 .query_formats = query_formats,
683 .inputs = fspp_inputs,
684 .outputs = fspp_outputs,
685 .priv_class = &fspp_class,
686 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,