2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * Fast Simple Post-processing filter
26 * This implementation is based on an algorithm described in
27 * "Aria Nosratinia Embedded Post-Processing for
28 * Enhancement of Compressed Images (1999)"
29 * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30 * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31 * them can be performed once per block, not per pixel. This allows for much
34 * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35 * project, and ported by Arwa Arif for FFmpeg.
38 #include "libavutil/avassert.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/mem_internal.h"
41 #include "libavutil/opt.h"
42 #include "libavutil/pixdesc.h"
47 #define OFFSET(x) offsetof(FSPPContext, x)
48 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
49 static const AVOption fspp_options[] = {
50 { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
51 { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
52 { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
53 { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
57 AVFILTER_DEFINE_CLASS(fspp);
59 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
60 { 0, 48, 12, 60, 3, 51, 15, 63, },
61 { 32, 16, 44, 28, 35, 19, 47, 31, },
62 { 8, 56, 4, 52, 11, 59, 7, 55, },
63 { 40, 24, 36, 20, 43, 27, 39, 23, },
64 { 2, 50, 14, 62, 1, 49, 13, 61, },
65 { 34, 18, 46, 30, 33, 17, 45, 29, },
66 { 10, 58, 6, 54, 9, 57, 5, 53, },
67 { 42, 26, 38, 22, 41, 25, 37, 21, },
70 static const short custom_threshold[64] = {
71 // values (296) can't be too high
72 // -it causes too big quant dependence
73 // or maybe overflow(check), which results in some flashing
74 71, 296, 295, 237, 71, 40, 38, 19,
75 245, 193, 185, 121, 102, 73, 53, 27,
76 158, 129, 141, 107, 97, 73, 50, 26,
77 102, 116, 109, 98, 82, 66, 45, 23,
78 71, 94, 95, 81, 70, 56, 38, 20,
79 56, 77, 74, 66, 56, 44, 30, 15,
80 38, 53, 50, 45, 38, 30, 21, 11,
81 20, 27, 26, 23, 20, 15, 11, 5
84 //This func reads from 1 slice, 1 and clears 0 & 1
85 static void store_slice_c(uint8_t *dst, int16_t *src,
86 ptrdiff_t dst_stride, ptrdiff_t src_stride,
87 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
91 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
92 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
93 if (temp & 0x100) temp = ~(temp >> 31); \
96 for (y = 0; y < height; y++) {
97 const uint8_t *d = dither[y];
98 for (x = 0; x < width; x += 8) {
114 //This func reads from 2 slices, 0 & 2 and clears 2-nd
115 static void store_slice2_c(uint8_t *dst, int16_t *src,
116 ptrdiff_t dst_stride, ptrdiff_t src_stride,
117 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
120 #define STORE2(pos) \
121 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
122 src[x + pos + 16 * src_stride] = 0; \
123 if (temp & 0x100) temp = ~(temp >> 31); \
126 for (y = 0; y < height; y++) {
127 const uint8_t *d = dither[y];
128 for (x = 0; x < width; x += 8) {
144 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
147 for (a = 0; a < 64; a++)
148 thr_adr[a] = q * thr_adr_noq[a];
151 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
152 int dst_stride, int src_stride,
153 int width, int height,
154 uint8_t *qp_store, int qp_stride, int is_luma)
156 int x, x0, y, es, qy, t;
158 const int stride = is_luma ? p->temp_stride : (width + 16);
159 const int step = 6 - p->log2_count;
160 const int qpsh = 4 - p->hsub * !is_luma;
161 const int qpsv = 4 - p->vsub * !is_luma;
163 DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
164 int16_t *block = (int16_t *)block_align;
165 int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
167 memset(block3, 0, 4 * 8 * BLOCKSZ);
169 if (!src || !dst) return;
171 for (y = 0; y < height; y++) {
172 int index = 8 + 8 * stride + y * stride;
173 memcpy(p->src + index, src + y * src_stride, width);
174 for (x = 0; x < 8; x++) {
175 p->src[index - x - 1] = p->src[index + x ];
176 p->src[index + width + x ] = p->src[index + width - x - 1];
180 for (y = 0; y < 8; y++) {
181 memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
182 memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
184 //FIXME (try edge emu)
186 for (y = 8; y < 24; y++)
187 memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
189 for (y = step; y < height + 8; y += step) { //step= 1,2
190 const int y1 = y - 8 + step; //l5-7 l4-6;
193 if (qy > height - 1) qy = height - 1;
196 qy = (qy >> qpsv) * qp_stride;
197 p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
199 for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
200 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
203 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
205 for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
206 t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
208 if (t < 0) t = 0; //t always < width-2
210 t = qp_store[qy + (t >> qpsh)];
211 t = ff_norm_qscale(t, p->qscale_type);
213 if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
214 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
216 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
217 memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
218 memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
221 es = width + 8 - x0; // 8, ...
223 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
225 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
227 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
229 if (!(y1 & 7) && y1) {
231 p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
232 dst_stride, stride, width, 8, 5 - p->log2_count);
234 p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
235 dst_stride, stride, width, 8, 5 - p->log2_count);
239 if (y & 7) { // height % 8 != 0
241 p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
242 dst_stride, stride, width, y&7, 5 - p->log2_count);
244 p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
245 dst_stride, stride, width, y&7, 5 - p->log2_count);
249 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
251 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
252 int_simd16_t tmp10, tmp11, tmp12, tmp13;
253 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
254 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
264 for (; cnt > 0; cnt -= 2) { //start positions
265 threshold = (int16_t *)thr_adr;//threshold_mtx
266 for (ctr = DCTSIZE; ctr > 0; ctr--) {
267 // Process columns from input, add to output.
268 tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
269 tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
271 tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
272 tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
274 tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
275 tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
277 tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
278 tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
290 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
296 THRESHOLD(tmp0, d0, threshold[0 * 8]);
297 THRESHOLD(tmp1, d2, threshold[2 * 8]);
298 THRESHOLD(tmp2, d4, threshold[4 * 8]);
299 THRESHOLD(tmp3, d6, threshold[6 * 8]);
301 tmp10 = (tmp0 + tmp2) >> 2;
302 tmp11 = (tmp0 - tmp2) >> 2;
304 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
305 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
307 tmp0 = tmp10 + tmp13; //->temps
308 tmp3 = tmp10 - tmp13; //->temps
309 tmp1 = tmp11 + tmp12; //->temps
310 tmp2 = tmp11 - tmp12; //->temps
318 z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
319 z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
320 z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
321 z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
333 THRESHOLD(tmp4, d1, threshold[1 * 8]);
334 THRESHOLD(tmp5, d3, threshold[3 * 8]);
335 THRESHOLD(tmp6, d5, threshold[5 * 8]);
336 THRESHOLD(tmp7, d7, threshold[7 * 8]);
338 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
340 z10 = (tmp6 - tmp5) << 1;
342 z12 = (tmp4 - tmp7) << 1;
344 tmp7 = (z11 + z13) >> 2; //+2 !
345 tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
346 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
347 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
348 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
354 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
355 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
356 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
357 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
358 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
359 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
360 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
361 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
363 dataptr++; //next column
367 dataptr += 8; //skip each second start pos
372 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
374 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
375 int_simd16_t tmp10, tmp11, tmp12, tmp13;
376 int_simd16_t z5, z10, z11, z12, z13;
383 for (; cnt > 0; cnt--) {
385 //Simd version reads 4x4 block and transposes it
386 tmp10 = wsptr[2] + wsptr[3];
387 tmp11 = wsptr[2] - wsptr[3];
389 tmp13 = wsptr[0] + wsptr[1];
390 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
392 tmp0 = tmp10 + tmp13; //->temps
393 tmp3 = tmp10 - tmp13; //->temps
394 tmp1 = tmp11 + tmp12;
395 tmp2 = tmp11 - tmp12;
398 //Also transpose, with previous:
400 // ---- ---- idct ||||
401 // ---- ---- ---> ||||
403 z13 = wsptr[4] + wsptr[5];
404 z10 = wsptr[4] - wsptr[5];
405 z11 = wsptr[6] + wsptr[7];
406 z12 = wsptr[6] - wsptr[7];
409 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
411 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
412 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
413 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
415 tmp6 = (tmp12 << 3) - tmp7;
416 tmp5 = (tmp11 << 3) - tmp6;
417 tmp4 = (tmp10 << 3) + tmp5;
419 // Final output stage: descale and write column
420 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
421 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
422 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
423 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
424 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
425 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
426 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
427 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
430 wsptr += DCTSIZE; // advance pointer to next row
434 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
436 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
437 int_simd16_t tmp10, tmp11, tmp12, tmp13;
438 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
442 // Pass 1: process rows.
445 for (; cnt > 0; cnt--) {
446 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
447 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
448 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
449 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
450 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
451 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
452 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
453 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
461 //Even columns are written first, this leads to different order of columns
462 //in column_fidct(), but they are processed independently, so all ok.
463 //Later in the row_idct() columns readed at the same order.
464 dataptr[2] = tmp10 + tmp11;
465 dataptr[3] = tmp10 - tmp11;
467 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
468 dataptr[0] = tmp13 + z1;
469 dataptr[1] = tmp13 - z1;
473 tmp10 = (tmp4 + tmp5) << 2;
474 tmp11 = (tmp5 + tmp6) << 2;
475 tmp12 = (tmp6 + tmp7) << 2;
477 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
478 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
479 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
480 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
485 dataptr[4] = z13 + z2;
486 dataptr[5] = z13 - z2;
487 dataptr[6] = z11 + z4;
488 dataptr[7] = z11 - z4;
490 pixels++; // advance pointer to next column
495 static int query_formats(AVFilterContext *ctx)
497 static const enum AVPixelFormat pix_fmts[] = {
498 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
499 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
500 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
501 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
502 AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
503 AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8,
507 AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
509 return AVERROR(ENOMEM);
510 return ff_set_common_formats(ctx, fmts_list);
513 static int config_input(AVFilterLink *inlink)
515 AVFilterContext *ctx = inlink->dst;
516 FSPPContext *fspp = ctx->priv;
517 const int h = FFALIGN(inlink->h + 16, 16);
518 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
520 fspp->hsub = desc->log2_chroma_w;
521 fspp->vsub = desc->log2_chroma_h;
523 fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
524 fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
525 fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
527 if (!fspp->temp || !fspp->src)
528 return AVERROR(ENOMEM);
530 fspp->store_slice = store_slice_c;
531 fspp->store_slice2 = store_slice2_c;
532 fspp->mul_thrmat = mul_thrmat_c;
533 fspp->column_fidct = column_fidct_c;
534 fspp->row_idct = row_idct_c;
535 fspp->row_fdct = row_fdct_c;
538 ff_fspp_init_x86(fspp);
543 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
545 AVFilterContext *ctx = inlink->dst;
546 FSPPContext *fspp = ctx->priv;
547 AVFilterLink *outlink = ctx->outputs[0];
551 int8_t *qp_table = NULL;
554 int custom_threshold_m[64];
556 bias = (1 << 4) + fspp->strength;
558 for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
559 custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
561 for (i = 0; i < 8; i++) {
562 fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
563 |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
564 |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
565 |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
567 fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
568 |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
569 |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
570 |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
574 fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
576 /* if we are not in a constant user quantizer mode and we don't want to use
577 * the quantizers from the B-frames (B-frames often have a higher QP), we
578 * need to save the qp table from the last non B-frame; this is what the
579 * following code block does */
580 if (!fspp->qp && (fspp->use_bframe_qp || in->pict_type != AV_PICTURE_TYPE_B)) {
581 ret = ff_qp_table_extract(in, &qp_table, &qp_stride, NULL, &fspp->qscale_type);
587 if (!fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
588 av_freep(&fspp->non_b_qp_table);
589 fspp->non_b_qp_table = qp_table;
590 fspp->non_b_qp_stride = qp_stride;
594 if (fspp->log2_count && !ctx->is_disabled) {
595 if (!fspp->use_bframe_qp && fspp->non_b_qp_table) {
596 qp_table = fspp->non_b_qp_table;
597 qp_stride = fspp->non_b_qp_stride;
600 if (qp_table || fspp->qp) {
601 const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
602 const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
604 /* get a new frame if in-place is not possible or if the dimensions
605 * are not multiple of 8 */
606 if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
607 const int aligned_w = FFALIGN(inlink->w, 8);
608 const int aligned_h = FFALIGN(inlink->h, 8);
610 out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
613 ret = AVERROR(ENOMEM);
616 av_frame_copy_props(out, in);
617 out->width = in->width;
618 out->height = in->height;
621 filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
622 inlink->w, inlink->h, qp_table, qp_stride, 1);
623 filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
624 cw, ch, qp_table, qp_stride, 0);
625 filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
626 cw, ch, qp_table, qp_stride, 0);
633 av_image_copy_plane(out->data[3], out->linesize[3],
634 in ->data[3], in ->linesize[3],
635 inlink->w, inlink->h);
638 ret = ff_filter_frame(outlink, out);
640 if (qp_table != fspp->non_b_qp_table)
645 static av_cold void uninit(AVFilterContext *ctx)
647 FSPPContext *fspp = ctx->priv;
648 av_freep(&fspp->temp);
649 av_freep(&fspp->src);
650 av_freep(&fspp->non_b_qp_table);
653 static const AVFilterPad fspp_inputs[] = {
656 .type = AVMEDIA_TYPE_VIDEO,
657 .config_props = config_input,
658 .filter_frame = filter_frame,
663 static const AVFilterPad fspp_outputs[] = {
666 .type = AVMEDIA_TYPE_VIDEO,
671 AVFilter ff_vf_fspp = {
673 .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
674 .priv_size = sizeof(FSPPContext),
676 .query_formats = query_formats,
677 .inputs = fspp_inputs,
678 .outputs = fspp_outputs,
679 .priv_class = &fspp_class,
680 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,