FILE *stats_file;
char *stats_file_str;
int nb_components;
+ int nb_threads;
int max;
uint64_t nb_frames;
double ssim[4], ssim_total;
char comps[4];
- float coefs[4];
+ double coefs[4];
uint8_t rgba_map[4];
int planewidth[4];
int planeheight[4];
- int *temp;
+ int **temp;
int is_rgb;
- float (*ssim_plane)(SSIMDSPContext *dsp,
- uint8_t *main, int main_stride,
- uint8_t *ref, int ref_stride,
- int width, int height, void *temp,
- int max);
+ double **score;
+ int (*ssim_plane)(AVFilterContext *ctx, void *arg,
+ int jobnr, int nb_jobs);
SSIMDSPContext dsp;
} SSIMContext;
static void set_meta(AVDictionary **metadata, const char *key, char comp, float d)
{
char value[128];
- snprintf(value, sizeof(value), "%0.2f", d);
+ snprintf(value, sizeof(value), "%f", d);
if (comp) {
char key2[128];
snprintf(key2, sizeof(key2), "%s%c", key, comp);
return ssim;
}
-static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width)
+static double ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width)
{
- float ssim = 0.0;
+ double ssim = 0.0;
int i;
for (i = 0; i < width; i++)
#define SUM_LEN(w) (((w) >> 2) + 3)
-static float ssim_plane_16bit(SSIMDSPContext *dsp,
- uint8_t *main, int main_stride,
- uint8_t *ref, int ref_stride,
- int width, int height, void *temp,
- int max)
+typedef struct ThreadData {
+ const uint8_t *main_data[4];
+ const uint8_t *ref_data[4];
+ int main_linesize[4];
+ int ref_linesize[4];
+ int planewidth[4];
+ int planeheight[4];
+ double **score;
+ int **temp;
+ int nb_components;
+ int max;
+ SSIMDSPContext *dsp;
+} ThreadData;
+
+static int ssim_plane_16bit(AVFilterContext *ctx, void *arg,
+ int jobnr, int nb_jobs)
{
- int z = 0, y;
- float ssim = 0.0;
- int64_t (*sum0)[4] = temp;
- int64_t (*sum1)[4] = sum0 + SUM_LEN(width);
-
- width >>= 2;
- height >>= 2;
-
- for (y = 1; y < height; y++) {
- for (; z <= y; z++) {
- FFSWAP(void*, sum0, sum1);
- ssim_4x4xn_16bit(&main[4 * z * main_stride], main_stride,
- &ref[4 * z * ref_stride], ref_stride,
- sum0, width);
+ ThreadData *td = arg;
+ double *score = td->score[jobnr];
+ void *temp = td->temp[jobnr];
+ const int max = td->max;
+
+ for (int c = 0; c < td->nb_components; c++) {
+ const uint8_t *main_data = td->main_data[c];
+ const uint8_t *ref_data = td->ref_data[c];
+ const int main_stride = td->main_linesize[c];
+ const int ref_stride = td->ref_linesize[c];
+ int width = td->planewidth[c];
+ int height = td->planeheight[c];
+ const int slice_start = ((height >> 2) * jobnr) / nb_jobs;
+ const int slice_end = ((height >> 2) * (jobnr+1)) / nb_jobs;
+ const int ystart = FFMAX(1, slice_start);
+ int z = ystart - 1;
+ double ssim = 0.0;
+ int64_t (*sum0)[4] = temp;
+ int64_t (*sum1)[4] = sum0 + SUM_LEN(width);
+
+ width >>= 2;
+ height >>= 2;
+
+ for (int y = ystart; y < slice_end; y++) {
+ for (; z <= y; z++) {
+ FFSWAP(void*, sum0, sum1);
+ ssim_4x4xn_16bit(&main_data[4 * z * main_stride], main_stride,
+ &ref_data[4 * z * ref_stride], ref_stride,
+ sum0, width);
+ }
+
+ ssim += ssim_endn_16bit((const int64_t (*)[4])sum0, (const int64_t (*)[4])sum1, width - 1, max);
}
- ssim += ssim_endn_16bit((const int64_t (*)[4])sum0, (const int64_t (*)[4])sum1, width - 1, max);
+ score[c] = ssim;
}
- return ssim / ((height - 1) * (width - 1));
+ return 0;
}
-static float ssim_plane(SSIMDSPContext *dsp,
- uint8_t *main, int main_stride,
- uint8_t *ref, int ref_stride,
- int width, int height, void *temp,
- int max)
+static int ssim_plane(AVFilterContext *ctx, void *arg,
+ int jobnr, int nb_jobs)
{
- int z = 0, y;
- float ssim = 0.0;
- int (*sum0)[4] = temp;
- int (*sum1)[4] = sum0 + SUM_LEN(width);
-
- width >>= 2;
- height >>= 2;
-
- for (y = 1; y < height; y++) {
- for (; z <= y; z++) {
- FFSWAP(void*, sum0, sum1);
- dsp->ssim_4x4_line(&main[4 * z * main_stride], main_stride,
- &ref[4 * z * ref_stride], ref_stride,
- sum0, width);
+ ThreadData *td = arg;
+ double *score = td->score[jobnr];
+ void *temp = td->temp[jobnr];
+ SSIMDSPContext *dsp = td->dsp;
+
+ for (int c = 0; c < td->nb_components; c++) {
+ const uint8_t *main_data = td->main_data[c];
+ const uint8_t *ref_data = td->ref_data[c];
+ const int main_stride = td->main_linesize[c];
+ const int ref_stride = td->ref_linesize[c];
+ int width = td->planewidth[c];
+ int height = td->planeheight[c];
+ const int slice_start = ((height >> 2) * jobnr) / nb_jobs;
+ const int slice_end = ((height >> 2) * (jobnr+1)) / nb_jobs;
+ const int ystart = FFMAX(1, slice_start);
+ int z = ystart - 1;
+ double ssim = 0.0;
+ int (*sum0)[4] = temp;
+ int (*sum1)[4] = sum0 + SUM_LEN(width);
+
+ width >>= 2;
+ height >>= 2;
+
+ for (int y = ystart; y < slice_end; y++) {
+ for (; z <= y; z++) {
+ FFSWAP(void*, sum0, sum1);
+ dsp->ssim_4x4_line(&main_data[4 * z * main_stride], main_stride,
+ &ref_data[4 * z * ref_stride], ref_stride,
+ sum0, width);
+ }
+
+ ssim += dsp->ssim_end_line((const int (*)[4])sum0, (const int (*)[4])sum1, width - 1);
}
- ssim += dsp->ssim_end_line((const int (*)[4])sum0, (const int (*)[4])sum1, width - 1);
+ score[c] = ssim;
}
- return ssim / ((height - 1) * (width - 1));
+ return 0;
}
static double ssim_db(double ssim, double weight)
{
- return 10 * log10(weight / (weight - ssim));
+ return (fabs(weight - ssim) > 1e-9) ? 10.0 * log10(weight / (weight - ssim)) : INFINITY;
}
static int do_ssim(FFFrameSync *fs)
SSIMContext *s = ctx->priv;
AVFrame *master, *ref;
AVDictionary **metadata;
- float c[4], ssimv = 0.0;
+ double c[4] = {0}, ssimv = 0.0;
+ ThreadData td;
int ret, i;
ret = ff_framesync_dualinput_get(fs, &master, &ref);
if (ret < 0)
return ret;
- if (!ref)
+ if (ctx->is_disabled || !ref)
return ff_filter_frame(ctx->outputs[0], master);
metadata = &master->metadata;
s->nb_frames++;
+ td.nb_components = s->nb_components;
+ td.dsp = &s->dsp;
+ td.score = s->score;
+ td.temp = s->temp;
+ td.max = s->max;
+
+ for (int n = 0; n < s->nb_components; n++) {
+ td.main_data[n] = master->data[n];
+ td.ref_data[n] = ref->data[n];
+ td.main_linesize[n] = master->linesize[n];
+ td.ref_linesize[n] = ref->linesize[n];
+ td.planewidth[n] = s->planewidth[n];
+ td.planeheight[n] = s->planeheight[n];
+ }
+
+ ctx->internal->execute(ctx, s->ssim_plane, &td, NULL, FFMIN((s->planeheight[1] + 3) >> 2, s->nb_threads));
+
+ for (i = 0; i < s->nb_components; i++) {
+ for (int j = 0; j < s->nb_threads; j++)
+ c[i] += s->score[j][i];
+ c[i] = c[i] / (((s->planewidth[i] >> 2) - 1) * ((s->planeheight[i] >> 2) - 1));
+ }
+
for (i = 0; i < s->nb_components; i++) {
- c[i] = s->ssim_plane(&s->dsp, master->data[i], master->linesize[i],
- ref->data[i], ref->linesize[i],
- s->planewidth[i], s->planeheight[i], s->temp,
- s->max);
ssimv += s->coefs[i] * c[i];
s->ssim[i] += c[i];
}
+
for (i = 0; i < s->nb_components; i++) {
int cidx = s->is_rgb ? s->rgba_map[i] : i;
set_meta(metadata, "lavfi.ssim.", s->comps[i], c[cidx]);
SSIMContext *s = ctx->priv;
int sum = 0, i;
+ s->nb_threads = ff_filter_get_nb_threads(ctx);
s->nb_components = desc->nb_components;
if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
for (i = 0; i < s->nb_components; i++)
s->coefs[i] = (double) s->planeheight[i] * s->planewidth[i] / sum;
- s->temp = av_mallocz_array(2 * SUM_LEN(inlink->w), (desc->comp[0].depth > 8) ? sizeof(int64_t[4]) : sizeof(int[4]));
+ s->temp = av_calloc(s->nb_threads, sizeof(*s->temp));
if (!s->temp)
return AVERROR(ENOMEM);
+
+ for (int t = 0; t < s->nb_threads; t++) {
+ s->temp[t] = av_mallocz_array(2 * SUM_LEN(inlink->w), (desc->comp[0].depth > 8) ? sizeof(int64_t[4]) : sizeof(int[4]));
+ if (!s->temp[t])
+ return AVERROR(ENOMEM);
+ }
s->max = (1 << desc->comp[0].depth) - 1;
s->ssim_plane = desc->comp[0].depth > 8 ? ssim_plane_16bit : ssim_plane;
if (ARCH_X86)
ff_ssim_init_x86(&s->dsp);
+ s->score = av_calloc(s->nb_threads, sizeof(*s->score));
+ if (!s->score)
+ return AVERROR(ENOMEM);
+
+ for (int t = 0; t < s->nb_threads && s->score; t++) {
+ s->score[t] = av_calloc(s->nb_components, sizeof(*s->score[0]));
+ if (!s->score[t])
+ return AVERROR(ENOMEM);
+ }
+
return 0;
}
if ((ret = ff_framesync_configure(&s->fs)) < 0)
return ret;
+ outlink->time_base = s->fs.time_base;
+
+ if (av_cmp_q(mainlink->time_base, outlink->time_base) ||
+ av_cmp_q(ctx->inputs[1]->time_base, outlink->time_base))
+ av_log(ctx, AV_LOG_WARNING, "not matching timebases found between first input: %d/%d and second input %d/%d, results may be incorrect!\n",
+ mainlink->time_base.num, mainlink->time_base.den,
+ ctx->inputs[1]->time_base.num, ctx->inputs[1]->time_base.den);
+
return 0;
}
if (s->stats_file && s->stats_file != stdout)
fclose(s->stats_file);
+ for (int t = 0; t < s->nb_threads && s->score; t++)
+ av_freep(&s->score[t]);
+ av_freep(&s->score);
+
+ for (int t = 0; t < s->nb_threads && s->temp; t++)
+ av_freep(&s->temp[t]);
av_freep(&s->temp);
}
.priv_class = &ssim_class,
.inputs = ssim_inputs,
.outputs = ssim_outputs,
+ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
};