2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * Fast Simple Post-processing filter
26 * This implementation is based on an algorithm described in
27 * "Aria Nosratinia Embedded Post-Processing for
28 * Enhancement of Compressed Images (1999)"
29 * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30 * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31 * them can be performed once per block, not per pixel. This allows for much
34 * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35 * project, and ported by Arwa Arif for FFmpeg.
38 #include "libavutil/avassert.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/mem_internal.h"
41 #include "libavutil/opt.h"
42 #include "libavutil/pixdesc.h"
46 #define OFFSET(x) offsetof(FSPPContext, x)
47 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
48 static const AVOption fspp_options[] = {
49 { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
50 { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
51 { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
52 { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_BOOL,{.i64 = 0}, 0, 1, FLAGS },
56 AVFILTER_DEFINE_CLASS(fspp);
58 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
59 { 0, 48, 12, 60, 3, 51, 15, 63, },
60 { 32, 16, 44, 28, 35, 19, 47, 31, },
61 { 8, 56, 4, 52, 11, 59, 7, 55, },
62 { 40, 24, 36, 20, 43, 27, 39, 23, },
63 { 2, 50, 14, 62, 1, 49, 13, 61, },
64 { 34, 18, 46, 30, 33, 17, 45, 29, },
65 { 10, 58, 6, 54, 9, 57, 5, 53, },
66 { 42, 26, 38, 22, 41, 25, 37, 21, },
69 static const short custom_threshold[64] = {
70 // values (296) can't be too high
71 // -it causes too big quant dependence
72 // or maybe overflow(check), which results in some flashing
73 71, 296, 295, 237, 71, 40, 38, 19,
74 245, 193, 185, 121, 102, 73, 53, 27,
75 158, 129, 141, 107, 97, 73, 50, 26,
76 102, 116, 109, 98, 82, 66, 45, 23,
77 71, 94, 95, 81, 70, 56, 38, 20,
78 56, 77, 74, 66, 56, 44, 30, 15,
79 38, 53, 50, 45, 38, 30, 21, 11,
80 20, 27, 26, 23, 20, 15, 11, 5
83 //This func reads from 1 slice, 1 and clears 0 & 1
84 static void store_slice_c(uint8_t *dst, int16_t *src,
85 ptrdiff_t dst_stride, ptrdiff_t src_stride,
86 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
90 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
91 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
92 if (temp & 0x100) temp = ~(temp >> 31); \
95 for (y = 0; y < height; y++) {
96 const uint8_t *d = dither[y];
97 for (x = 0; x < width; x += 8) {
113 //This func reads from 2 slices, 0 & 2 and clears 2-nd
114 static void store_slice2_c(uint8_t *dst, int16_t *src,
115 ptrdiff_t dst_stride, ptrdiff_t src_stride,
116 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
119 #define STORE2(pos) \
120 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
121 src[x + pos + 16 * src_stride] = 0; \
122 if (temp & 0x100) temp = ~(temp >> 31); \
125 for (y = 0; y < height; y++) {
126 const uint8_t *d = dither[y];
127 for (x = 0; x < width; x += 8) {
143 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
146 for (a = 0; a < 64; a++)
147 thr_adr[a] = q * thr_adr_noq[a];
150 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
151 int dst_stride, int src_stride,
152 int width, int height,
153 uint8_t *qp_store, int qp_stride, int is_luma)
155 int x, x0, y, es, qy, t;
157 const int stride = is_luma ? p->temp_stride : (width + 16);
158 const int step = 6 - p->log2_count;
159 const int qpsh = 4 - p->hsub * !is_luma;
160 const int qpsv = 4 - p->vsub * !is_luma;
162 DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
163 int16_t *block = (int16_t *)block_align;
164 int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
166 memset(block3, 0, 4 * 8 * BLOCKSZ);
168 if (!src || !dst) return;
170 for (y = 0; y < height; y++) {
171 int index = 8 + 8 * stride + y * stride;
172 memcpy(p->src + index, src + y * src_stride, width);
173 for (x = 0; x < 8; x++) {
174 p->src[index - x - 1] = p->src[index + x ];
175 p->src[index + width + x ] = p->src[index + width - x - 1];
179 for (y = 0; y < 8; y++) {
180 memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
181 memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
183 //FIXME (try edge emu)
185 for (y = 8; y < 24; y++)
186 memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
188 for (y = step; y < height + 8; y += step) { //step= 1,2
189 const int y1 = y - 8 + step; //l5-7 l4-6;
192 if (qy > height - 1) qy = height - 1;
195 qy = (qy >> qpsv) * qp_stride;
196 p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
198 for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
199 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
202 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
204 for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
205 t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
207 if (t < 0) t = 0; //t always < width-2
209 t = qp_store[qy + (t >> qpsh)];
210 t = ff_norm_qscale(t, p->qscale_type);
212 if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
213 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
215 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
216 memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
217 memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
220 es = width + 8 - x0; // 8, ...
222 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
224 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
226 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
228 if (!(y1 & 7) && y1) {
230 p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
231 dst_stride, stride, width, 8, 5 - p->log2_count);
233 p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
234 dst_stride, stride, width, 8, 5 - p->log2_count);
238 if (y & 7) { // height % 8 != 0
240 p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
241 dst_stride, stride, width, y&7, 5 - p->log2_count);
243 p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
244 dst_stride, stride, width, y&7, 5 - p->log2_count);
248 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
250 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
251 int_simd16_t tmp10, tmp11, tmp12, tmp13;
252 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
253 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
263 for (; cnt > 0; cnt -= 2) { //start positions
264 threshold = (int16_t *)thr_adr;//threshold_mtx
265 for (ctr = DCTSIZE; ctr > 0; ctr--) {
266 // Process columns from input, add to output.
267 tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
268 tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
270 tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
271 tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
273 tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
274 tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
276 tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
277 tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
289 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
295 THRESHOLD(tmp0, d0, threshold[0 * 8]);
296 THRESHOLD(tmp1, d2, threshold[2 * 8]);
297 THRESHOLD(tmp2, d4, threshold[4 * 8]);
298 THRESHOLD(tmp3, d6, threshold[6 * 8]);
300 tmp10 = (tmp0 + tmp2) >> 2;
301 tmp11 = (tmp0 - tmp2) >> 2;
303 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
304 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
306 tmp0 = tmp10 + tmp13; //->temps
307 tmp3 = tmp10 - tmp13; //->temps
308 tmp1 = tmp11 + tmp12; //->temps
309 tmp2 = tmp11 - tmp12; //->temps
317 z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
318 z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
319 z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
320 z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
332 THRESHOLD(tmp4, d1, threshold[1 * 8]);
333 THRESHOLD(tmp5, d3, threshold[3 * 8]);
334 THRESHOLD(tmp6, d5, threshold[5 * 8]);
335 THRESHOLD(tmp7, d7, threshold[7 * 8]);
337 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
339 z10 = (tmp6 - tmp5) << 1;
341 z12 = (tmp4 - tmp7) << 1;
343 tmp7 = (z11 + z13) >> 2; //+2 !
344 tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
345 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
346 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
347 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
353 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
354 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
355 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
356 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
357 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
358 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
359 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
360 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
362 dataptr++; //next column
366 dataptr += 8; //skip each second start pos
371 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
373 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
374 int_simd16_t tmp10, tmp11, tmp12, tmp13;
375 int_simd16_t z5, z10, z11, z12, z13;
382 for (; cnt > 0; cnt--) {
384 //Simd version reads 4x4 block and transposes it
385 tmp10 = wsptr[2] + wsptr[3];
386 tmp11 = wsptr[2] - wsptr[3];
388 tmp13 = wsptr[0] + wsptr[1];
389 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
391 tmp0 = tmp10 + tmp13; //->temps
392 tmp3 = tmp10 - tmp13; //->temps
393 tmp1 = tmp11 + tmp12;
394 tmp2 = tmp11 - tmp12;
397 //Also transpose, with previous:
399 // ---- ---- idct ||||
400 // ---- ---- ---> ||||
402 z13 = wsptr[4] + wsptr[5];
403 z10 = wsptr[4] - wsptr[5];
404 z11 = wsptr[6] + wsptr[7];
405 z12 = wsptr[6] - wsptr[7];
408 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
410 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
411 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
412 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
414 tmp6 = (tmp12 << 3) - tmp7;
415 tmp5 = (tmp11 << 3) - tmp6;
416 tmp4 = (tmp10 << 3) + tmp5;
418 // Final output stage: descale and write column
419 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
420 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
421 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
422 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
423 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
424 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
425 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
426 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
429 wsptr += DCTSIZE; // advance pointer to next row
433 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
435 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
436 int_simd16_t tmp10, tmp11, tmp12, tmp13;
437 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
441 // Pass 1: process rows.
444 for (; cnt > 0; cnt--) {
445 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
446 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
447 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
448 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
449 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
450 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
451 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
452 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
460 //Even columns are written first, this leads to different order of columns
461 //in column_fidct(), but they are processed independently, so all ok.
462 //Later in the row_idct() columns readed at the same order.
463 dataptr[2] = tmp10 + tmp11;
464 dataptr[3] = tmp10 - tmp11;
466 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
467 dataptr[0] = tmp13 + z1;
468 dataptr[1] = tmp13 - z1;
472 tmp10 = (tmp4 + tmp5) << 2;
473 tmp11 = (tmp5 + tmp6) << 2;
474 tmp12 = (tmp6 + tmp7) << 2;
476 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
477 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
478 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
479 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
484 dataptr[4] = z13 + z2;
485 dataptr[5] = z13 - z2;
486 dataptr[6] = z11 + z4;
487 dataptr[7] = z11 - z4;
489 pixels++; // advance pointer to next column
494 static int query_formats(AVFilterContext *ctx)
496 static const enum AVPixelFormat pix_fmts[] = {
497 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
498 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
499 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
500 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
501 AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
502 AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8,
506 AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
508 return AVERROR(ENOMEM);
509 return ff_set_common_formats(ctx, fmts_list);
512 static int config_input(AVFilterLink *inlink)
514 AVFilterContext *ctx = inlink->dst;
515 FSPPContext *fspp = ctx->priv;
516 const int h = FFALIGN(inlink->h + 16, 16);
517 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
519 fspp->hsub = desc->log2_chroma_w;
520 fspp->vsub = desc->log2_chroma_h;
522 fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
523 fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
524 fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
526 if (!fspp->temp || !fspp->src)
527 return AVERROR(ENOMEM);
529 if (!fspp->use_bframe_qp && !fspp->qp) {
530 fspp->non_b_qp_alloc_size = AV_CEIL_RSHIFT(inlink->w, 4) * AV_CEIL_RSHIFT(inlink->h, 4);
531 fspp->non_b_qp_table = av_calloc(fspp->non_b_qp_alloc_size, sizeof(*fspp->non_b_qp_table));
532 if (!fspp->non_b_qp_table)
533 return AVERROR(ENOMEM);
536 fspp->store_slice = store_slice_c;
537 fspp->store_slice2 = store_slice2_c;
538 fspp->mul_thrmat = mul_thrmat_c;
539 fspp->column_fidct = column_fidct_c;
540 fspp->row_idct = row_idct_c;
541 fspp->row_fdct = row_fdct_c;
544 ff_fspp_init_x86(fspp);
549 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
551 AVFilterContext *ctx = inlink->dst;
552 FSPPContext *fspp = ctx->priv;
553 AVFilterLink *outlink = ctx->outputs[0];
557 uint8_t *qp_table = NULL;
559 int custom_threshold_m[64];
561 bias = (1 << 4) + fspp->strength;
563 for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
564 custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
566 for (i = 0; i < 8; i++) {
567 fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
568 |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
569 |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
570 |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
572 fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
573 |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
574 |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
575 |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
579 fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
581 /* if we are not in a constant user quantizer mode and we don't want to use
582 * the quantizers from the B-frames (B-frames often have a higher QP), we
583 * need to save the qp table from the last non B-frame; this is what the
584 * following code block does */
586 qp_table = av_frame_get_qp_table(in, &qp_stride, &fspp->qscale_type);
588 if (qp_table && !fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
591 /* if the qp stride is not set, it means the QP are only defined on
594 w = AV_CEIL_RSHIFT(inlink->w, 4);
598 h = AV_CEIL_RSHIFT(inlink->h, 4);
600 if (w * h > fspp->non_b_qp_alloc_size) {
601 int ret = av_reallocp_array(&fspp->non_b_qp_table, w, h);
603 fspp->non_b_qp_alloc_size = 0;
606 fspp->non_b_qp_alloc_size = w * h;
609 av_assert0(w * h <= fspp->non_b_qp_alloc_size);
610 memcpy(fspp->non_b_qp_table, qp_table, w * h);
614 if (fspp->log2_count && !ctx->is_disabled) {
615 if (!fspp->use_bframe_qp && fspp->non_b_qp_table)
616 qp_table = fspp->non_b_qp_table;
618 if (qp_table || fspp->qp) {
619 const int cw = AV_CEIL_RSHIFT(inlink->w, fspp->hsub);
620 const int ch = AV_CEIL_RSHIFT(inlink->h, fspp->vsub);
622 /* get a new frame if in-place is not possible or if the dimensions
623 * are not multiple of 8 */
624 if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
625 const int aligned_w = FFALIGN(inlink->w, 8);
626 const int aligned_h = FFALIGN(inlink->h, 8);
628 out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
631 return AVERROR(ENOMEM);
633 av_frame_copy_props(out, in);
634 out->width = in->width;
635 out->height = in->height;
638 filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
639 inlink->w, inlink->h, qp_table, qp_stride, 1);
640 filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
641 cw, ch, qp_table, qp_stride, 0);
642 filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
643 cw, ch, qp_table, qp_stride, 0);
650 av_image_copy_plane(out->data[3], out->linesize[3],
651 in ->data[3], in ->linesize[3],
652 inlink->w, inlink->h);
655 return ff_filter_frame(outlink, out);
658 static av_cold void uninit(AVFilterContext *ctx)
660 FSPPContext *fspp = ctx->priv;
661 av_freep(&fspp->temp);
662 av_freep(&fspp->src);
663 av_freep(&fspp->non_b_qp_table);
666 static const AVFilterPad fspp_inputs[] = {
669 .type = AVMEDIA_TYPE_VIDEO,
670 .config_props = config_input,
671 .filter_frame = filter_frame,
676 static const AVFilterPad fspp_outputs[] = {
679 .type = AVMEDIA_TYPE_VIDEO,
684 AVFilter ff_vf_fspp = {
686 .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
687 .priv_size = sizeof(FSPPContext),
689 .query_formats = query_formats,
690 .inputs = fspp_inputs,
691 .outputs = fspp_outputs,
692 .priv_class = &fspp_class,
693 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,