2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * Fast Simple Post-processing filter
26 * This implementation is based on an algorithm described in
27 * "Aria Nosratinia Embedded Post-Processing for
28 * Enhancement of Compressed Images (1999)"
29 * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30 * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31 * them can be performed once per block, not per pixel. This allows for much
34 * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35 * project, and ported by Arwa Arif for FFmpeg.
38 #include "libavutil/avassert.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/pixdesc.h"
45 #define OFFSET(x) offsetof(FSPPContext, x)
46 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
47 static const AVOption fspp_options[] = {
48 { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
49 { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
50 { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
51 { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
55 AVFILTER_DEFINE_CLASS(fspp);
57 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
58 { 0, 48, 12, 60, 3, 51, 15, 63, },
59 { 32, 16, 44, 28, 35, 19, 47, 31, },
60 { 8, 56, 4, 52, 11, 59, 7, 55, },
61 { 40, 24, 36, 20, 43, 27, 39, 23, },
62 { 2, 50, 14, 62, 1, 49, 13, 61, },
63 { 34, 18, 46, 30, 33, 17, 45, 29, },
64 { 10, 58, 6, 54, 9, 57, 5, 53, },
65 { 42, 26, 38, 22, 41, 25, 37, 21, },
68 static const short custom_threshold[64] = {
69 // values (296) can't be too high
70 // -it causes too big quant dependence
71 // or maybe overflow(check), which results in some flashing
72 71, 296, 295, 237, 71, 40, 38, 19,
73 245, 193, 185, 121, 102, 73, 53, 27,
74 158, 129, 141, 107, 97, 73, 50, 26,
75 102, 116, 109, 98, 82, 66, 45, 23,
76 71, 94, 95, 81, 70, 56, 38, 20,
77 56, 77, 74, 66, 56, 44, 30, 15,
78 38, 53, 50, 45, 38, 30, 21, 11,
79 20, 27, 26, 23, 20, 15, 11, 5
82 //This func reads from 1 slice, 1 and clears 0 & 1
83 static void store_slice_c(uint8_t *dst, int16_t *src,
84 ptrdiff_t dst_stride, ptrdiff_t src_stride,
85 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
89 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
90 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
91 if (temp & 0x100) temp = ~(temp >> 31); \
94 for (y = 0; y < height; y++) {
95 const uint8_t *d = dither[y];
96 for (x = 0; x < width; x += 8) {
112 //This func reads from 2 slices, 0 & 2 and clears 2-nd
113 static void store_slice2_c(uint8_t *dst, int16_t *src,
114 ptrdiff_t dst_stride, ptrdiff_t src_stride,
115 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
118 #define STORE2(pos) \
119 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
120 src[x + pos + 16 * src_stride] = 0; \
121 if (temp & 0x100) temp = ~(temp >> 31); \
124 for (y = 0; y < height; y++) {
125 const uint8_t *d = dither[y];
126 for (x = 0; x < width; x += 8) {
142 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
145 for (a = 0; a < 64; a++)
146 thr_adr[a] = q * thr_adr_noq[a];
149 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
150 int dst_stride, int src_stride,
151 int width, int height,
152 uint8_t *qp_store, int qp_stride, int is_luma)
154 int x, x0, y, es, qy, t;
156 const int stride = is_luma ? p->temp_stride : (width + 16);
157 const int step = 6 - p->log2_count;
158 const int qpsh = 4 - p->hsub * !is_luma;
159 const int qpsv = 4 - p->vsub * !is_luma;
161 DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
162 int16_t *block = (int16_t *)block_align;
163 int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
165 memset(block3, 0, 4 * 8 * BLOCKSZ);
167 if (!src || !dst) return;
169 for (y = 0; y < height; y++) {
170 int index = 8 + 8 * stride + y * stride;
171 memcpy(p->src + index, src + y * src_stride, width);
172 for (x = 0; x < 8; x++) {
173 p->src[index - x - 1] = p->src[index + x ];
174 p->src[index + width + x ] = p->src[index + width - x - 1];
178 for (y = 0; y < 8; y++) {
179 memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
180 memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
182 //FIXME (try edge emu)
184 for (y = 8; y < 24; y++)
185 memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
187 for (y = step; y < height + 8; y += step) { //step= 1,2
188 const int y1 = y - 8 + step; //l5-7 l4-6;
191 if (qy > height - 1) qy = height - 1;
194 qy = (qy >> qpsv) * qp_stride;
195 p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
197 for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
198 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
201 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
203 for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
204 t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
206 if (t < 0) t = 0; //t always < width-2
208 t = qp_store[qy + (t >> qpsh)];
209 t = ff_norm_qscale(t, p->qscale_type);
211 if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
212 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
214 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
215 memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
216 memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
219 es = width + 8 - x0; // 8, ...
221 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
223 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
225 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
227 if (!(y1 & 7) && y1) {
229 p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
230 dst_stride, stride, width, 8, 5 - p->log2_count);
232 p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
233 dst_stride, stride, width, 8, 5 - p->log2_count);
237 if (y & 7) { // height % 8 != 0
239 p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
240 dst_stride, stride, width, y&7, 5 - p->log2_count);
242 p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
243 dst_stride, stride, width, y&7, 5 - p->log2_count);
247 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
249 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
250 int_simd16_t tmp10, tmp11, tmp12, tmp13;
251 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
252 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
262 for (; cnt > 0; cnt -= 2) { //start positions
263 threshold = (int16_t *)thr_adr;//threshold_mtx
264 for (ctr = DCTSIZE; ctr > 0; ctr--) {
265 // Process columns from input, add to output.
266 tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
267 tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
269 tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
270 tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
272 tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
273 tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
275 tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
276 tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
288 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
294 THRESHOLD(tmp0, d0, threshold[0 * 8]);
295 THRESHOLD(tmp1, d2, threshold[2 * 8]);
296 THRESHOLD(tmp2, d4, threshold[4 * 8]);
297 THRESHOLD(tmp3, d6, threshold[6 * 8]);
299 tmp10 = (tmp0 + tmp2) >> 2;
300 tmp11 = (tmp0 - tmp2) >> 2;
302 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
303 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
305 tmp0 = tmp10 + tmp13; //->temps
306 tmp3 = tmp10 - tmp13; //->temps
307 tmp1 = tmp11 + tmp12; //->temps
308 tmp2 = tmp11 - tmp12; //->temps
316 z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
317 z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
318 z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
319 z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
331 THRESHOLD(tmp4, d1, threshold[1 * 8]);
332 THRESHOLD(tmp5, d3, threshold[3 * 8]);
333 THRESHOLD(tmp6, d5, threshold[5 * 8]);
334 THRESHOLD(tmp7, d7, threshold[7 * 8]);
336 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
338 z10 = (tmp6 - tmp5) << 1;
340 z12 = (tmp4 - tmp7) << 1;
342 tmp7 = (z11 + z13) >> 2; //+2 !
343 tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
344 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
345 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
346 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
352 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
353 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
354 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
355 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
356 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
357 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
358 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
359 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
361 dataptr++; //next column
365 dataptr += 8; //skip each second start pos
370 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
372 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
373 int_simd16_t tmp10, tmp11, tmp12, tmp13;
374 int_simd16_t z5, z10, z11, z12, z13;
381 for (; cnt > 0; cnt--) {
383 //Simd version reads 4x4 block and transposes it
384 tmp10 = wsptr[2] + wsptr[3];
385 tmp11 = wsptr[2] - wsptr[3];
387 tmp13 = wsptr[0] + wsptr[1];
388 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
390 tmp0 = tmp10 + tmp13; //->temps
391 tmp3 = tmp10 - tmp13; //->temps
392 tmp1 = tmp11 + tmp12;
393 tmp2 = tmp11 - tmp12;
396 //Also transpose, with previous:
398 // ---- ---- idct ||||
399 // ---- ---- ---> ||||
401 z13 = wsptr[4] + wsptr[5];
402 z10 = wsptr[4] - wsptr[5];
403 z11 = wsptr[6] + wsptr[7];
404 z12 = wsptr[6] - wsptr[7];
407 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
409 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
410 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
411 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
413 tmp6 = (tmp12 << 3) - tmp7;
414 tmp5 = (tmp11 << 3) - tmp6;
415 tmp4 = (tmp10 << 3) + tmp5;
417 // Final output stage: descale and write column
418 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
419 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
420 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
421 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
422 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
423 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
424 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
425 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
428 wsptr += DCTSIZE; // advance pointer to next row
432 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
434 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
435 int_simd16_t tmp10, tmp11, tmp12, tmp13;
436 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
440 // Pass 1: process rows.
443 for (; cnt > 0; cnt--) {
444 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
445 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
446 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
447 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
448 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
449 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
450 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
451 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
459 //Even columns are written first, this leads to different order of columns
460 //in column_fidct(), but they are processed independently, so all ok.
461 //Later in the row_idct() columns readed at the same order.
462 dataptr[2] = tmp10 + tmp11;
463 dataptr[3] = tmp10 - tmp11;
465 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
466 dataptr[0] = tmp13 + z1;
467 dataptr[1] = tmp13 - z1;
471 tmp10 = (tmp4 + tmp5) << 2;
472 tmp11 = (tmp5 + tmp6) << 2;
473 tmp12 = (tmp6 + tmp7) << 2;
475 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
476 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
477 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
478 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
483 dataptr[4] = z13 + z2;
484 dataptr[5] = z13 - z2;
485 dataptr[6] = z11 + z4;
486 dataptr[7] = z11 - z4;
488 pixels++; // advance pointer to next column
493 static int query_formats(AVFilterContext *ctx)
495 static const enum AVPixelFormat pix_fmts[] = {
496 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
497 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
498 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
499 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
500 AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
501 AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8,
505 AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
507 return AVERROR(ENOMEM);
508 return ff_set_common_formats(ctx, fmts_list);
511 static int config_input(AVFilterLink *inlink)
513 AVFilterContext *ctx = inlink->dst;
514 FSPPContext *fspp = ctx->priv;
515 const int h = FFALIGN(inlink->h + 16, 16);
516 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
518 fspp->hsub = desc->log2_chroma_w;
519 fspp->vsub = desc->log2_chroma_h;
521 fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
522 fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
523 fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
525 if (!fspp->temp || !fspp->src)
526 return AVERROR(ENOMEM);
528 if (!fspp->use_bframe_qp && !fspp->qp) {
529 fspp->non_b_qp_alloc_size = AV_CEIL_RSHIFT(inlink->w, 4) * AV_CEIL_RSHIFT(inlink->h, 4);
530 fspp->non_b_qp_table = av_calloc(fspp->non_b_qp_alloc_size, sizeof(*fspp->non_b_qp_table));
531 if (!fspp->non_b_qp_table)
532 return AVERROR(ENOMEM);
535 fspp->store_slice = store_slice_c;
536 fspp->store_slice2 = store_slice2_c;
537 fspp->mul_thrmat = mul_thrmat_c;
538 fspp->column_fidct = column_fidct_c;
539 fspp->row_idct = row_idct_c;
540 fspp->row_fdct = row_fdct_c;
543 ff_fspp_init_x86(fspp);
548 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
550 AVFilterContext *ctx = inlink->dst;
551 FSPPContext *fspp = ctx->priv;
552 AVFilterLink *outlink = ctx->outputs[0];
556 uint8_t *qp_table = NULL;
558 int custom_threshold_m[64];
560 bias = (1 << 4) + fspp->strength;
562 for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
563 custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
565 for (i = 0; i < 8; i++) {
566 fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
567 |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
568 |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
569 |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
571 fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
572 |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
573 |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
574 |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
578 fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
580 /* if we are not in a constant user quantizer mode and we don't want to use
581 * the quantizers from the B-frames (B-frames often have a higher QP), we
582 * need to save the qp table from the last non B-frame; this is what the
583 * following code block does */
585 qp_table = av_frame_get_qp_table(in, &qp_stride, &fspp->qscale_type);
587 if (qp_table && !fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
590 /* if the qp stride is not set, it means the QP are only defined on
593 w = AV_CEIL_RSHIFT(inlink->w, 4);
597 h = AV_CEIL_RSHIFT(inlink->h, 4);
599 if (w * h > fspp->non_b_qp_alloc_size) {
600 int ret = av_reallocp_array(&fspp->non_b_qp_table, w, h);
602 fspp->non_b_qp_alloc_size = 0;
605 fspp->non_b_qp_alloc_size = w * h;
608 av_assert0(w * h <= fspp->non_b_qp_alloc_size);
609 memcpy(fspp->non_b_qp_table, qp_table, w * h);
613 if (fspp->log2_count && !ctx->is_disabled) {
614 if (!fspp->use_bframe_qp && fspp->non_b_qp_table)
615 qp_table = fspp->non_b_qp_table;
617 if (qp_table || fspp->qp) {
618 const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
619 const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
621 /* get a new frame if in-place is not possible or if the dimensions
622 * are not multiple of 8 */
623 if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
624 const int aligned_w = FFALIGN(inlink->w, 8);
625 const int aligned_h = FFALIGN(inlink->h, 8);
627 out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
630 return AVERROR(ENOMEM);
632 av_frame_copy_props(out, in);
633 out->width = in->width;
634 out->height = in->height;
637 filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
638 inlink->w, inlink->h, qp_table, qp_stride, 1);
639 filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
640 cw, ch, qp_table, qp_stride, 0);
641 filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
642 cw, ch, qp_table, qp_stride, 0);
649 av_image_copy_plane(out->data[3], out->linesize[3],
650 in ->data[3], in ->linesize[3],
651 inlink->w, inlink->h);
654 return ff_filter_frame(outlink, out);
657 static av_cold void uninit(AVFilterContext *ctx)
659 FSPPContext *fspp = ctx->priv;
660 av_freep(&fspp->temp);
661 av_freep(&fspp->src);
662 av_freep(&fspp->non_b_qp_table);
665 static const AVFilterPad fspp_inputs[] = {
668 .type = AVMEDIA_TYPE_VIDEO,
669 .config_props = config_input,
670 .filter_frame = filter_frame,
675 static const AVFilterPad fspp_outputs[] = {
678 .type = AVMEDIA_TYPE_VIDEO,
683 AVFilter ff_vf_fspp = {
685 .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
686 .priv_size = sizeof(FSPPContext),
688 .query_formats = query_formats,
689 .inputs = fspp_inputs,
690 .outputs = fspp_outputs,
691 .priv_class = &fspp_class,
692 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,