2 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3 * Copyright (C) 2005 Nikolaj Poroshin <porosh3@psu.ru>
4 * Copyright (c) 2014 Arwa Arif <arwaarif1994@gmail.com>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License along
19 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * Fast Simple Post-processing filter
26 * This implementation is based on an algorithm described in
27 * "Aria Nosratinia Embedded Post-Processing for
28 * Enhancement of Compressed Images (1999)"
29 * (http://www.utdallas.edu/~aria/papers/vlsisp99.pdf)
30 * Further, with splitting (I)DCT into horizontal/vertical passes, one of
31 * them can be performed once per block, not per pixel. This allows for much
34 * Originally written by Michael Niedermayer and Nikolaj for the MPlayer
35 * project, and ported by Arwa Arif for FFmpeg.
38 #include "libavutil/avassert.h"
39 #include "libavutil/imgutils.h"
40 #include "libavutil/opt.h"
41 #include "libavutil/pixdesc.h"
43 #include "libavcodec/avcodec.h" //for reference to FF_QSCALE_TYPE
46 #define OFFSET(x) offsetof(FSPPContext, x)
47 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
48 static const AVOption fspp_options[] = {
49 { "quality", "set quality", OFFSET(log2_count), AV_OPT_TYPE_INT, {.i64 = 4}, 4, MAX_LEVEL, FLAGS },
50 { "qp", "force a constant quantizer parameter", OFFSET(qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 64, FLAGS },
51 { "strength", "set filter strength", OFFSET(strength), AV_OPT_TYPE_INT, {.i64 = 0}, -15, 32, FLAGS },
52 { "use_bframe_qp", "use B-frames' QP", OFFSET(use_bframe_qp), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS },
56 AVFILTER_DEFINE_CLASS(fspp);
58 DECLARE_ALIGNED(32, static const uint8_t, dither)[8][8] = {
59 { 0, 48, 12, 60, 3, 51, 15, 63, },
60 { 32, 16, 44, 28, 35, 19, 47, 31, },
61 { 8, 56, 4, 52, 11, 59, 7, 55, },
62 { 40, 24, 36, 20, 43, 27, 39, 23, },
63 { 2, 50, 14, 62, 1, 49, 13, 61, },
64 { 34, 18, 46, 30, 33, 17, 45, 29, },
65 { 10, 58, 6, 54, 9, 57, 5, 53, },
66 { 42, 26, 38, 22, 41, 25, 37, 21, },
69 static const short custom_threshold[64] = {
70 // values (296) can't be too high
71 // -it causes too big quant dependence
72 // or maybe overflow(check), which results in some flashing
73 71, 296, 295, 237, 71, 40, 38, 19,
74 245, 193, 185, 121, 102, 73, 53, 27,
75 158, 129, 141, 107, 97, 73, 50, 26,
76 102, 116, 109, 98, 82, 66, 45, 23,
77 71, 94, 95, 81, 70, 56, 38, 20,
78 56, 77, 74, 66, 56, 44, 30, 15,
79 38, 53, 50, 45, 38, 30, 21, 11,
80 20, 27, 26, 23, 20, 15, 11, 5
83 static inline int norm_qscale(int qscale, int type)
86 case FF_QSCALE_TYPE_MPEG1: return qscale;
87 case FF_QSCALE_TYPE_MPEG2: return qscale >> 1;
88 case FF_QSCALE_TYPE_H264: return qscale >> 2;
89 case FF_QSCALE_TYPE_VP56: return (63 - qscale + 2) >> 2;
94 //This func reads from 1 slice, 1 and clears 0 & 1
95 static void store_slice_c(uint8_t *dst, int16_t *src,
96 ptrdiff_t dst_stride, ptrdiff_t src_stride,
97 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
101 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
102 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
103 if (temp & 0x100) temp = ~(temp >> 31); \
106 for (y = 0; y < height; y++) {
107 const uint8_t *d = dither[y];
108 for (x = 0; x < width; x += 8) {
124 //This func reads from 2 slices, 0 & 2 and clears 2-nd
125 static void store_slice2_c(uint8_t *dst, int16_t *src,
126 ptrdiff_t dst_stride, ptrdiff_t src_stride,
127 ptrdiff_t width, ptrdiff_t height, ptrdiff_t log2_scale)
130 #define STORE2(pos) \
131 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
132 src[x + pos + 16 * src_stride] = 0; \
133 if (temp & 0x100) temp = ~(temp >> 31); \
136 for (y = 0; y < height; y++) {
137 const uint8_t *d = dither[y];
138 for (x = 0; x < width; x += 8) {
154 static void mul_thrmat_c(int16_t *thr_adr_noq, int16_t *thr_adr, int q)
157 for (a = 0; a < 64; a++)
158 thr_adr[a] = q * thr_adr_noq[a];
161 static void filter(FSPPContext *p, uint8_t *dst, uint8_t *src,
162 int dst_stride, int src_stride,
163 int width, int height,
164 uint8_t *qp_store, int qp_stride, int is_luma)
166 int x, x0, y, es, qy, t;
168 const int stride = is_luma ? p->temp_stride : (width + 16);
169 const int step = 6 - p->log2_count;
170 const int qpsh = 4 - p->hsub * !is_luma;
171 const int qpsv = 4 - p->vsub * !is_luma;
173 DECLARE_ALIGNED(32, int32_t, block_align)[4 * 8 * BLOCKSZ + 4 * 8 * BLOCKSZ];
174 int16_t *block = (int16_t *)block_align;
175 int16_t *block3 = (int16_t *)(block_align + 4 * 8 * BLOCKSZ);
177 memset(block3, 0, 4 * 8 * BLOCKSZ);
179 if (!src || !dst) return;
181 for (y = 0; y < height; y++) {
182 int index = 8 + 8 * stride + y * stride;
183 memcpy(p->src + index, src + y * src_stride, width);
184 for (x = 0; x < 8; x++) {
185 p->src[index - x - 1] = p->src[index + x ];
186 p->src[index + width + x ] = p->src[index + width - x - 1];
190 for (y = 0; y < 8; y++) {
191 memcpy(p->src + ( 7 - y ) * stride, p->src + ( y + 8 ) * stride, stride);
192 memcpy(p->src + (height + 8 + y) * stride, p->src + (height - y + 7) * stride, stride);
194 //FIXME (try edge emu)
196 for (y = 8; y < 24; y++)
197 memset(p->temp + 8 + y * stride, 0, width * sizeof(int16_t));
199 for (y = step; y < height + 8; y += step) { //step= 1,2
200 const int y1 = y - 8 + step; //l5-7 l4-6;
203 if (qy > height - 1) qy = height - 1;
206 qy = (qy >> qpsv) * qp_stride;
207 p->row_fdct(block, p->src + y * stride + 2 - (y&1), stride, 2);
209 for (x0 = 0; x0 < width + 8 - 8 * (BLOCKSZ - 1); x0 += 8 * (BLOCKSZ - 1)) {
210 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y&1), stride, 2 * (BLOCKSZ - 1));
213 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + 0 * 8, block3 + 0 * 8, 8 * (BLOCKSZ - 1)); //yes, this is a HOTSPOT
215 for (x = 0; x < 8 * (BLOCKSZ - 1); x += 8) {
216 t = x + x0 - 2; //correct t=x+x0-2-(y&1), but its the same
218 if (t < 0) t = 0; //t always < width-2
220 t = qp_store[qy + (t >> qpsh)];
221 t = norm_qscale(t, p->qscale_type);
223 if (t != p->prev_q) p->prev_q = t, p->mul_thrmat((int16_t *)(&p->threshold_mtx_noq[0]), (int16_t *)(&p->threshold_mtx[0]), t);
224 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block + x * 8, block3 + x * 8, 8); //yes, this is a HOTSPOT
226 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, 2 * (BLOCKSZ - 1));
227 memmove(block, block + (BLOCKSZ - 1) * 64, 8 * 8 * sizeof(int16_t)); //cycling
228 memmove(block3, block3 + (BLOCKSZ - 1) * 64, 6 * 8 * sizeof(int16_t));
231 es = width + 8 - x0; // 8, ...
233 p->row_fdct(block + 8 * 8, p->src + y * stride + 8 + x0 + 2 - (y & 1), stride, (es - 4) >> 2);
235 p->column_fidct((int16_t *)(&p->threshold_mtx[0]), block, block3, es&(~1));
236 p->row_idct(block3 + 0 * 8, p->temp + (y & 15) * stride + x0 + 2 - (y & 1), stride, es >> 2);
238 if (!(y1 & 7) && y1) {
240 p->store_slice(dst + (y1 - 8) * dst_stride, p->temp + 8 + 8 * stride,
241 dst_stride, stride, width, 8, 5 - p->log2_count);
243 p->store_slice2(dst + (y1 - 8) * dst_stride, p->temp + 8 + 0 * stride,
244 dst_stride, stride, width, 8, 5 - p->log2_count);
248 if (y & 7) { // height % 8 != 0
250 p->store_slice(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 8 * stride,
251 dst_stride, stride, width, y&7, 5 - p->log2_count);
253 p->store_slice2(dst + ((y - 8) & ~7) * dst_stride, p->temp + 8 + 0 * stride,
254 dst_stride, stride, width, y&7, 5 - p->log2_count);
258 static void column_fidct_c(int16_t *thr_adr, int16_t *data, int16_t *output, int cnt)
260 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
261 int_simd16_t tmp10, tmp11, tmp12, tmp13;
262 int_simd16_t z1,z2,z3,z4,z5, z10, z11, z12, z13;
263 int_simd16_t d0, d1, d2, d3, d4, d5, d6, d7;
273 for (; cnt > 0; cnt -= 2) { //start positions
274 threshold = (int16_t *)thr_adr;//threshold_mtx
275 for (ctr = DCTSIZE; ctr > 0; ctr--) {
276 // Process columns from input, add to output.
277 tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
278 tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
280 tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
281 tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
283 tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
284 tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
286 tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
287 tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
299 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
305 THRESHOLD(tmp0, d0, threshold[0 * 8]);
306 THRESHOLD(tmp1, d2, threshold[2 * 8]);
307 THRESHOLD(tmp2, d4, threshold[4 * 8]);
308 THRESHOLD(tmp3, d6, threshold[6 * 8]);
310 tmp10 = (tmp0 + tmp2) >> 2;
311 tmp11 = (tmp0 - tmp2) >> 2;
313 tmp13 = (tmp1 + tmp3) >>2; //+2 ! (psnr decides)
314 tmp12 = MULTIPLY16H((tmp1 - tmp3), FIX_1_414213562_A) - tmp13; //<<2
316 tmp0 = tmp10 + tmp13; //->temps
317 tmp3 = tmp10 - tmp13; //->temps
318 tmp1 = tmp11 + tmp12; //->temps
319 tmp2 = tmp11 - tmp12; //->temps
327 z5 = MULTIPLY16H((tmp10 - tmp12) << 2, FIX_0_382683433);
328 z2 = MULTIPLY16H(tmp10 << 2, FIX_0_541196100) + z5;
329 z4 = MULTIPLY16H(tmp12 << 2, FIX_1_306562965) + z5;
330 z3 = MULTIPLY16H(tmp11 << 2, FIX_0_707106781);
342 THRESHOLD(tmp4, d1, threshold[1 * 8]);
343 THRESHOLD(tmp5, d3, threshold[3 * 8]);
344 THRESHOLD(tmp6, d5, threshold[5 * 8]);
345 THRESHOLD(tmp7, d7, threshold[7 * 8]);
347 //Simd version uses here a shortcut for the tmp5,tmp6,tmp7 == 0
349 z10 = (tmp6 - tmp5) << 1;
351 z12 = (tmp4 - tmp7) << 1;
353 tmp7 = (z11 + z13) >> 2; //+2 !
354 tmp11 = MULTIPLY16H((z11 - z13) << 1, FIX_1_414213562);
355 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
356 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
357 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - !!
363 wsptr[DCTSIZE * 0] += (tmp0 + tmp7);
364 wsptr[DCTSIZE * 1] += (tmp1 + tmp6);
365 wsptr[DCTSIZE * 2] += (tmp2 + tmp5);
366 wsptr[DCTSIZE * 3] += (tmp3 - tmp4);
367 wsptr[DCTSIZE * 4] += (tmp3 + tmp4);
368 wsptr[DCTSIZE * 5] += (tmp2 - tmp5);
369 wsptr[DCTSIZE * 6] = (tmp1 - tmp6);
370 wsptr[DCTSIZE * 7] = (tmp0 - tmp7);
372 dataptr++; //next column
376 dataptr += 8; //skip each second start pos
381 static void row_idct_c(int16_t *workspace, int16_t *output_adr, ptrdiff_t output_stride, int cnt)
383 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
384 int_simd16_t tmp10, tmp11, tmp12, tmp13;
385 int_simd16_t z5, z10, z11, z12, z13;
392 for (; cnt > 0; cnt--) {
394 //Simd version reads 4x4 block and transposes it
395 tmp10 = wsptr[2] + wsptr[3];
396 tmp11 = wsptr[2] - wsptr[3];
398 tmp13 = wsptr[0] + wsptr[1];
399 tmp12 = (MULTIPLY16H(wsptr[0] - wsptr[1], FIX_1_414213562_A) << 2) - tmp13;//this shift order to avoid overflow
401 tmp0 = tmp10 + tmp13; //->temps
402 tmp3 = tmp10 - tmp13; //->temps
403 tmp1 = tmp11 + tmp12;
404 tmp2 = tmp11 - tmp12;
407 //Also transpose, with previous:
409 // ---- ---- idct ||||
410 // ---- ---- ---> ||||
412 z13 = wsptr[4] + wsptr[5];
413 z10 = wsptr[4] - wsptr[5];
414 z11 = wsptr[6] + wsptr[7];
415 z12 = wsptr[6] - wsptr[7];
418 tmp11 = MULTIPLY16H(z11 - z13, FIX_1_414213562);
420 z5 = MULTIPLY16H(z10 + z12, FIX_1_847759065);
421 tmp10 = MULTIPLY16H(z12, FIX_1_082392200) - z5;
422 tmp12 = MULTIPLY16H(z10, FIX_2_613125930) + z5; // - FIX_
424 tmp6 = (tmp12 << 3) - tmp7;
425 tmp5 = (tmp11 << 3) - tmp6;
426 tmp4 = (tmp10 << 3) + tmp5;
428 // Final output stage: descale and write column
429 outptr[0 * output_stride] += DESCALE(tmp0 + tmp7, 3);
430 outptr[1 * output_stride] += DESCALE(tmp1 + tmp6, 3);
431 outptr[2 * output_stride] += DESCALE(tmp2 + tmp5, 3);
432 outptr[3 * output_stride] += DESCALE(tmp3 - tmp4, 3);
433 outptr[4 * output_stride] += DESCALE(tmp3 + tmp4, 3);
434 outptr[5 * output_stride] += DESCALE(tmp2 - tmp5, 3);
435 outptr[6 * output_stride] += DESCALE(tmp1 - tmp6, 3); //no += ?
436 outptr[7 * output_stride] += DESCALE(tmp0 - tmp7, 3); //no += ?
439 wsptr += DCTSIZE; // advance pointer to next row
443 static void row_fdct_c(int16_t *data, const uint8_t *pixels, ptrdiff_t line_size, int cnt)
445 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
446 int_simd16_t tmp10, tmp11, tmp12, tmp13;
447 int_simd16_t z1, z2, z3, z4, z5, z11, z13;
451 // Pass 1: process rows.
454 for (; cnt > 0; cnt--) {
455 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
456 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
457 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
458 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
459 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
460 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
461 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
462 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
470 //Even columns are written first, this leads to different order of columns
471 //in column_fidct(), but they are processed independently, so all ok.
472 //Later in the row_idct() columns readed at the same order.
473 dataptr[2] = tmp10 + tmp11;
474 dataptr[3] = tmp10 - tmp11;
476 z1 = MULTIPLY16H((tmp12 + tmp13) << 2, FIX_0_707106781);
477 dataptr[0] = tmp13 + z1;
478 dataptr[1] = tmp13 - z1;
482 tmp10 = (tmp4 + tmp5) << 2;
483 tmp11 = (tmp5 + tmp6) << 2;
484 tmp12 = (tmp6 + tmp7) << 2;
486 z5 = MULTIPLY16H(tmp10 - tmp12, FIX_0_382683433);
487 z2 = MULTIPLY16H(tmp10, FIX_0_541196100) + z5;
488 z4 = MULTIPLY16H(tmp12, FIX_1_306562965) + z5;
489 z3 = MULTIPLY16H(tmp11, FIX_0_707106781);
494 dataptr[4] = z13 + z2;
495 dataptr[5] = z13 - z2;
496 dataptr[6] = z11 + z4;
497 dataptr[7] = z11 - z4;
499 pixels++; // advance pointer to next column
504 static int query_formats(AVFilterContext *ctx)
506 static const enum PixelFormat pix_fmts[] = {
507 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P,
508 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P,
509 AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV440P,
510 AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ422P,
511 AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ440P,
512 AV_PIX_FMT_GBRP, AV_PIX_FMT_GRAY8,
515 ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
519 static int config_input(AVFilterLink *inlink)
521 AVFilterContext *ctx = inlink->dst;
522 FSPPContext *fspp = ctx->priv;
523 const int h = FFALIGN(inlink->h + 16, 16);
524 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
526 fspp->hsub = desc->log2_chroma_w;
527 fspp->vsub = desc->log2_chroma_h;
529 fspp->temp_stride = FFALIGN(inlink->w + 16, 16);
530 fspp->temp = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->temp));
531 fspp->src = av_malloc_array(fspp->temp_stride, h * sizeof(*fspp->src));
533 if (!fspp->temp || !fspp->src)
534 return AVERROR(ENOMEM);
536 if (!fspp->use_bframe_qp && !fspp->qp) {
537 fspp->non_b_qp_alloc_size = FF_CEIL_RSHIFT(inlink->w, 4) * FF_CEIL_RSHIFT(inlink->h, 4);
538 fspp->non_b_qp_table = av_calloc(fspp->non_b_qp_alloc_size, sizeof(*fspp->non_b_qp_table));
539 if (!fspp->non_b_qp_table)
540 return AVERROR(ENOMEM);
543 fspp->store_slice = store_slice_c;
544 fspp->store_slice2 = store_slice2_c;
545 fspp->mul_thrmat = mul_thrmat_c;
546 fspp->column_fidct = column_fidct_c;
547 fspp->row_idct = row_idct_c;
548 fspp->row_fdct = row_fdct_c;
551 ff_fspp_init_x86(fspp);
556 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
558 AVFilterContext *ctx = inlink->dst;
559 FSPPContext *fspp = ctx->priv;
560 AVFilterLink *outlink = ctx->outputs[0];
564 uint8_t *qp_table = NULL;
566 int custom_threshold_m[64];
568 bias = (1 << 4) + fspp->strength;
570 for (i = 0; i < 64; i++) //FIXME: tune custom_threshold[] and remove this !
571 custom_threshold_m[i] = (int)(custom_threshold[i] * (bias / 71.0) + 0.5);
573 for (i = 0; i < 8; i++) {
574 fspp->threshold_mtx_noq[2 * i] = (uint64_t)custom_threshold_m[i * 8 + 2]
575 |(((uint64_t)custom_threshold_m[i * 8 + 6]) << 16)
576 |(((uint64_t)custom_threshold_m[i * 8 + 0]) << 32)
577 |(((uint64_t)custom_threshold_m[i * 8 + 4]) << 48);
579 fspp->threshold_mtx_noq[2 * i + 1] = (uint64_t)custom_threshold_m[i * 8 + 5]
580 |(((uint64_t)custom_threshold_m[i * 8 + 3]) << 16)
581 |(((uint64_t)custom_threshold_m[i * 8 + 1]) << 32)
582 |(((uint64_t)custom_threshold_m[i * 8 + 7]) << 48);
586 fspp->prev_q = fspp->qp, fspp->mul_thrmat((int16_t *)(&fspp->threshold_mtx_noq[0]), (int16_t *)(&fspp->threshold_mtx[0]), fspp->qp);
588 /* if we are not in a constant user quantizer mode and we don't want to use
589 * the quantizers from the B-frames (B-frames often have a higher QP), we
590 * need to save the qp table from the last non B-frame; this is what the
591 * following code block does */
593 qp_table = av_frame_get_qp_table(in, &qp_stride, &fspp->qscale_type);
595 if (qp_table && !fspp->use_bframe_qp && in->pict_type != AV_PICTURE_TYPE_B) {
598 /* if the qp stride is not set, it means the QP are only defined on
601 w = FF_CEIL_RSHIFT(inlink->w, 4);
605 h = FF_CEIL_RSHIFT(inlink->h, 4);
607 if (w * h > fspp->non_b_qp_alloc_size) {
608 int ret = av_reallocp_array(&fspp->non_b_qp_table, w, h);
610 fspp->non_b_qp_alloc_size = 0;
613 fspp->non_b_qp_alloc_size = w * h;
616 av_assert0(w * h <= fspp->non_b_qp_alloc_size);
617 memcpy(fspp->non_b_qp_table, qp_table, w * h);
621 if (fspp->log2_count && !ctx->is_disabled) {
622 if (!fspp->use_bframe_qp && fspp->non_b_qp_table)
623 qp_table = fspp->non_b_qp_table;
625 if (qp_table || fspp->qp) {
626 const int cw = FF_CEIL_RSHIFT(inlink->w, fspp->hsub);
627 const int ch = FF_CEIL_RSHIFT(inlink->h, fspp->vsub);
629 /* get a new frame if in-place is not possible or if the dimensions
630 * are not multiple of 8 */
631 if (!av_frame_is_writable(in) || (inlink->w & 7) || (inlink->h & 7)) {
632 const int aligned_w = FFALIGN(inlink->w, 8);
633 const int aligned_h = FFALIGN(inlink->h, 8);
635 out = ff_get_video_buffer(outlink, aligned_w, aligned_h);
638 return AVERROR(ENOMEM);
640 av_frame_copy_props(out, in);
643 filter(fspp, out->data[0], in->data[0], out->linesize[0], in->linesize[0],
644 inlink->w, inlink->h, qp_table, qp_stride, 1);
645 filter(fspp, out->data[1], in->data[1], out->linesize[1], in->linesize[1],
646 cw, ch, qp_table, qp_stride, 0);
647 filter(fspp, out->data[2], in->data[2], out->linesize[2], in->linesize[2],
648 cw, ch, qp_table, qp_stride, 0);
655 av_image_copy_plane(out->data[3], out->linesize[3],
656 in ->data[3], in ->linesize[3],
657 inlink->w, inlink->h);
660 return ff_filter_frame(outlink, out);
663 static av_cold void uninit(AVFilterContext *ctx)
665 FSPPContext *fspp = ctx->priv;
666 av_freep(&fspp->temp);
667 av_freep(&fspp->src);
668 av_freep(&fspp->non_b_qp_table);
671 static const AVFilterPad fspp_inputs[] = {
674 .type = AVMEDIA_TYPE_VIDEO,
675 .config_props = config_input,
676 .filter_frame = filter_frame,
681 static const AVFilterPad fspp_outputs[] = {
684 .type = AVMEDIA_TYPE_VIDEO,
689 AVFilter ff_vf_fspp = {
691 .description = NULL_IF_CONFIG_SMALL("Apply Fast Simple Post-processing filter."),
692 .priv_size = sizeof(FSPPContext),
694 .query_formats = query_formats,
695 .inputs = fspp_inputs,
696 .outputs = fspp_outputs,
697 .priv_class = &fspp_class,
698 .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,