2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * Fast Simple Post-processing filter
26 * This implementation is based on an algorithm described in
27 * "Aria Nosratinia Embedded Post-Processing for
28 * Enhancement of Compressed Images (1999)"
29 * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30 * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31 * them can be performed once per block, not per pixel. This allows for much
34 * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35 * project, and ported by Arwa Arif for FFmpeg.
38 #include "libavutil/avassert.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/pixdesc.h"
45 #define OFFSET(x) offsetof(FSPPContext, x)
46 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
47 static const AVOption fspp_options[] = {
48 { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
49 { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
50 { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
51 { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS },
55 AVFILTER_DEFINE_CLASS(fspp);
57 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
58 { 0, 48, 12, 60, 3, 51, 15, 63, },
59 { 32, 16, 44, 28, 35, 19, 47, 31, },
60 { 8, 56, 4, 52, 11, 59, 7, 55, },
61 { 40, 24, 36, 20, 43, 27, 39, 23, },
62 { 2, 50, 14, 62, 1, 49, 13, 61, },
63 { 34, 18, 46, 30, 33, 17, 45, 29, },
64 { 10, 58, 6, 54, 9, 57, 5, 53, },
65 { 42, 26, 38, 22, 41, 25, 37, 21, },
68 static const short custom_threshold[64] = {
69 // values (296) can't be too high
70 // -it causes too big quant dependence
71 // or maybe overflow(check), which results in some flashing
72 71, 296, 295, 237, 71, 40, 38, 19,
73 245, 193, 185, 121, 102, 73, 53, 27,
74 158, 129, 141, 107, 97, 73, 50, 26,
75 102, 116, 109, 98, 82, 66, 45, 23,
76 71, 94, 95, 81, 70, 56, 38, 20,
77 56, 77, 74, 66, 56, 44, 30, 15,
78 38, 53, 50, 45, 38, 30, 21, 11,
79 20, 27, 26, 23, 20, 15, 11, 5
82 //This func reads from 1 slice, 1 and clears 0 & 1
83 static void store_slice_c(uint8_t *dst, int16_t *src,
84 ptrdiff_t dst_stride, ptrdiff_t src_stride,
85 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
89 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
90 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
91 if (temp & 0x100) temp = ~(temp >> 31); \
94 for (y = 0; y < height; y++) {
95 const uint8_t *d = dither[y];
96 for (x = 0; x < width; x += 8) {
112 //This func reads from 2 slices, 0 & 2 and clears 2-nd
113 static void store_slice2_c(uint8_t *dst, int16_t *src,
114 ptrdiff_t dst_stride, ptrdiff_t src_stride,
115 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
118 #define STORE2(pos) \
119 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
120 src[x + pos + 16 * src_stride] = 0; \
121 if (temp & 0x100) temp = ~(temp >> 31); \
124 for (y = 0; y < height; y++) {
125 const uint8_t *d = dither[y];
126 for (x = 0; x < width; x += 8) {
142 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
145 for (a = 0; a < 64; a++)
146 thr_adr[a] = q * thr_adr_noq[a];
149 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
150 int dst_stride, int src_stride,
151 int width, int height,
152 uint8_t *qp_store, int qp_stride, int is_luma)
154 int x, x0, y, es, qy, t;
156 const int stride = is_luma ? p->temp_stride : (width + 16);
157 const int step = 6 - p->log2_count;
158 const int qpsh = 4 - p->hsub * !is_luma;
159 const int qpsv = 4 - p->vsub * !is_luma;
161 DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
162 int16_t *block = (int16_t *)block_align;
163 int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
165 memset(block3, 0, 4 * 8 * BLOCKSZ);
167 if (!src || !dst) return;
169 for (y = 0; y < height; y++) {
170 int index = 8 + 8 * stride + y * stride;
171 memcpy(p->src + index, src + y * src_stride, width);
172 for (x = 0; x < 8; x++) {
173 p->src[index - x - 1] = p->src[index + x ];
174 p->src[index + width + x ] = p->src[index + width - x - 1];
178 for (y = 0; y < 8; y++) {
179 memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
180 memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
182 //FIXME (try edge emu)
184 for (y = 8; y < 24; y++)
185 memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
187 for (y = step; y < height + 8; y += step) { //step= 1,2
188 const int y1 = y - 8 + step; //l5-7 l4-6;
191 if (qy > height - 1) qy = height - 1;
194 qy = (qy >> qpsv) * qp_stride;
195 p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
197 for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
198 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
201 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
203 for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
204 t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
206 if (t < 0) t = 0; //t always < width-2
208 t = qp_store[qy + (t >> qpsh)];
209 t = ff_norm_qscale(t, p->qscale_type);
211 if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
212 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
214 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
215 memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
216 memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
219 es = width + 8 - x0; // 8, ...
221 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
223 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
225 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
227 if (!(y1 & 7) && y1) {
229 p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
230 dst_stride, stride, width, 8, 5 - p->log2_count);
232 p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
233 dst_stride, stride, width, 8, 5 - p->log2_count);
237 if (y & 7) { // height % 8 != 0
239 p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
240 dst_stride, stride, width, y&7, 5 - p->log2_count);
242 p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
243 dst_stride, stride, width, y&7, 5 - p->log2_count);
247 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
249 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
250 int_simd16_t tmp10, tmp11, tmp12, tmp13;
251 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
252 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
262 for (; cnt > 0; cnt -= 2) { //start positions
263 threshold = (int16_t *)thr_adr;//threshold_mtx
264 for (ctr = DCTSIZE; ctr > 0; ctr--) {
265 // Process columns from input, add to output.
266 tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
267 tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
269 tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
270 tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
272 tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
273 tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
275 tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
276 tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
288 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
294 THRESHOLD(tmp0, d0, threshold[0 * 8]);
295 THRESHOLD(tmp1, d2, threshold[2 * 8]);
296 THRESHOLD(tmp2, d4, threshold[4 * 8]);
297 THRESHOLD(tmp3, d6, threshold[6 * 8]);
299 tmp10 = (tmp0 + tmp2) >> 2;
300 tmp11 = (tmp0 - tmp2) >> 2;
302 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
303 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
305 tmp0 = tmp10 + tmp13; //->temps
306 tmp3 = tmp10 - tmp13; //->temps
307 tmp1 = tmp11 + tmp12; //->temps
308 tmp2 = tmp11 - tmp12; //->temps
316 z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
317 z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
318 z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
319 z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
331 THRESHOLD(tmp4, d1, threshold[1 * 8]);
332 THRESHOLD(tmp5, d3, threshold[3 * 8]);
333 THRESHOLD(tmp6, d5, threshold[5 * 8]);
334 THRESHOLD(tmp7, d7, threshold[7 * 8]);
336 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
338 z10 = (tmp6 - tmp5) << 1;
340 z12 = (tmp4 - tmp7) << 1;
342 tmp7 = (z11 + z13) >> 2; //+2 !
343 tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
344 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
345 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
346 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
352 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
353 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
354 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
355 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
356 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
357 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
358 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
359 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
361 dataptr++; //next column
365 dataptr += 8; //skip each second start pos
370 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
372 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
373 int_simd16_t tmp10, tmp11, tmp12, tmp13;
374 int_simd16_t z5, z10, z11, z12, z13;
381 for (; cnt > 0; cnt--) {
383 //Simd version reads 4x4 block and transposes it
384 tmp10 = wsptr[2] + wsptr[3];
385 tmp11 = wsptr[2] - wsptr[3];
387 tmp13 = wsptr[0] + wsptr[1];
388 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
390 tmp0 = tmp10 + tmp13; //->temps
391 tmp3 = tmp10 - tmp13; //->temps
392 tmp1 = tmp11 + tmp12;
393 tmp2 = tmp11 - tmp12;
396 //Also transpose, with previous:
398 // ---- ---- idct ||||
399 // ---- ---- ---> ||||
401 z13 = wsptr[4] + wsptr[5];
402 z10 = wsptr[4] - wsptr[5];
403 z11 = wsptr[6] + wsptr[7];
404 z12 = wsptr[6] - wsptr[7];
407 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
409 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
410 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
411 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
413 tmp6 = (tmp12 << 3) - tmp7;
414 tmp5 = (tmp11 << 3) - tmp6;
415 tmp4 = (tmp10 << 3) + tmp5;
417 // Final output stage: descale and write column
418 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
419 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
420 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
421 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
422 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
423 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
424 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
425 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
428 wsptr += DCTSIZE; // advance pointer to next row
432 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
434 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
435 int_simd16_t tmp10, tmp11, tmp12, tmp13;
436 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
440 // Pass 1: process rows.
443 for (; cnt > 0; cnt--) {
444 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
445 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
446 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
447 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
448 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
449 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
450 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
451 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
459 //Even columns are written first, this leads to different order of columns
460 //in column_fidct(), but they are processed independently, so all ok.
461 //Later in the row_idct() columns readed at the same order.
462 dataptr[2] = tmp10 + tmp11;
463 dataptr[3] = tmp10 - tmp11;
465 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
466 dataptr[0] = tmp13 + z1;
467 dataptr[1] = tmp13 - z1;
471 tmp10 = (tmp4 + tmp5) << 2;
472 tmp11 = (tmp5 + tmp6) << 2;
473 tmp12 = (tmp6 + tmp7) << 2;
475 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
476 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
477 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
478 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
483 dataptr[4] = z13 + z2;
484 dataptr[5] = z13 - z2;
485 dataptr[6] = z11 + z4;
486 dataptr[7] = z11 - z4;
488 pixels++; // advance pointer to next column
493 static int query_formats(AVFilterContext *ctx)
495 static const enum AVPixelFormat pix_fmts[] = {
496 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
497 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
498 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
499 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
500 AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
501 AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8,
504 ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
508 static int config_input(AVFilterLink *inlink)
510 AVFilterContext *ctx = inlink->dst;
511 FSPPContext *fspp = ctx->priv;
512 const int h = FFALIGN(inlink->h + 16, 16);
513 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
515 fspp->hsub = desc->log2_chroma_w;
516 fspp->vsub = desc->log2_chroma_h;
518 fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
519 fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
520 fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
522 if (!fspp->temp || !fspp->src)
523 return AVERROR(ENOMEM);
525 if (!fspp->use_bframe_qp && !fspp->qp) {
526 fspp->non_b_qp_alloc_size = FF_CEIL_RSHIFT(inlink->w, 4) * FF_CEIL_RSHIFT(inlink->h, 4);
527 fspp->non_b_qp_table = av_calloc(fspp->non_b_qp_alloc_size, sizeof(*fspp->non_b_qp_table));
528 if (!fspp->non_b_qp_table)
529 return AVERROR(ENOMEM);
532 fspp->store_slice = store_slice_c;
533 fspp->store_slice2 = store_slice2_c;
534 fspp->mul_thrmat = mul_thrmat_c;
535 fspp->column_fidct = column_fidct_c;
536 fspp->row_idct = row_idct_c;
537 fspp->row_fdct = row_fdct_c;
540 ff_fspp_init_x86(fspp);
545 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
547 AVFilterContext *ctx = inlink->dst;
548 FSPPContext *fspp = ctx->priv;
549 AVFilterLink *outlink = ctx->outputs[0];
553 uint8_t *qp_table = NULL;
555 int custom_threshold_m[64];
557 bias = (1 << 4) + fspp->strength;
559 for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
560 custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
562 for (i = 0; i < 8; i++) {
563 fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
564 |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
565 |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
566 |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
568 fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
569 |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
570 |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
571 |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
575 fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
577 /* if we are not in a constant user quantizer mode and we don't want to use
578 * the quantizers from the B-frames (B-frames often have a higher QP), we
579 * need to save the qp table from the last non B-frame; this is what the
580 * following code block does */
582 qp_table = av_frame_get_qp_table(in, &qp_stride, &fspp->qscale_type);
584 if (qp_table && !fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
587 /* if the qp stride is not set, it means the QP are only defined on
590 w = FF_CEIL_RSHIFT(inlink->w, 4);
594 h = FF_CEIL_RSHIFT(inlink->h, 4);
596 if (w * h > fspp->non_b_qp_alloc_size) {
597 int ret = av_reallocp_array(&fspp->non_b_qp_table, w, h);
599 fspp->non_b_qp_alloc_size = 0;
602 fspp->non_b_qp_alloc_size = w * h;
605 av_assert0(w * h <= fspp->non_b_qp_alloc_size);
606 memcpy(fspp->non_b_qp_table, qp_table, w * h);
610 if (fspp->log2_count && !ctx->is_disabled) {
611 if (!fspp->use_bframe_qp && fspp->non_b_qp_table)
612 qp_table = fspp->non_b_qp_table;
614 if (qp_table || fspp->qp) {
615 const int cw = FF_CEIL_RSHIFT(inlink->w, fspp->hsub);
616 const int ch = FF_CEIL_RSHIFT(inlink->h, fspp->vsub);
618 /* get a new frame if in-place is not possible or if the dimensions
619 * are not multiple of 8 */
620 if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
621 const int aligned_w = FFALIGN(inlink->w, 8);
622 const int aligned_h = FFALIGN(inlink->h, 8);
624 out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
627 return AVERROR(ENOMEM);
629 av_frame_copy_props(out, in);
630 out->width = in->width;
631 out->height = in->height;
634 filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
635 inlink->w, inlink->h, qp_table, qp_stride, 1);
636 filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
637 cw, ch, qp_table, qp_stride, 0);
638 filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
639 cw, ch, qp_table, qp_stride, 0);
646 av_image_copy_plane(out->data[3], out->linesize[3],
647 in ->data[3], in ->linesize[3],
648 inlink->w, inlink->h);
651 return ff_filter_frame(outlink, out);
654 static av_cold void uninit(AVFilterContext *ctx)
656 FSPPContext *fspp = ctx->priv;
657 av_freep(&fspp->temp);
658 av_freep(&fspp->src);
659 av_freep(&fspp->non_b_qp_table);
662 static const AVFilterPad fspp_inputs[] = {
665 .type = AVMEDIA_TYPE_VIDEO,
666 .config_props = config_input,
667 .filter_frame = filter_frame,
672 static const AVFilterPad fspp_outputs[] = {
675 .type = AVMEDIA_TYPE_VIDEO,
680 AVFilter ff_vf_fspp = {
682 .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
683 .priv_size = sizeof(FSPPContext),
685 .query_formats = query_formats,
686 .inputs = fspp_inputs,
687 .outputs = fspp_outputs,
688 .priv_class = &fspp_class,
689 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,