- int z = 0, y;
- float ssim = 0.0;
- int64_t (*sum0)[4] = temp;
- int64_t (*sum1)[4] = sum0 + SUM_LEN(width);
-
- width >>= 2;
- height >>= 2;
-
- for (y = 1; y < height; y++) {
- for (; z <= y; z++) {
- FFSWAP(void*, sum0, sum1);
- ssim_4x4xn_16bit(&main[4 * z * main_stride], main_stride,
- &ref[4 * z * ref_stride], ref_stride,
- sum0, width);
+ ThreadData *td = arg;
+ double *score = td->score[jobnr];
+ void *temp = td->temp[jobnr];
+ const int max = td->max;
+
+ for (int c = 0; c < td->nb_components; c++) {
+ const uint8_t *main_data = td->main_data[c];
+ const uint8_t *ref_data = td->ref_data[c];
+ const int main_stride = td->main_linesize[c];
+ const int ref_stride = td->ref_linesize[c];
+ int width = td->planewidth[c];
+ int height = td->planeheight[c];
+ const int slice_start = ((height >> 2) * jobnr) / nb_jobs;
+ const int slice_end = ((height >> 2) * (jobnr+1)) / nb_jobs;
+ const int ystart = FFMAX(1, slice_start);
+ int z = ystart - 1;
+ double ssim = 0.0;
+ int64_t (*sum0)[4] = temp;
+ int64_t (*sum1)[4] = sum0 + SUM_LEN(width);
+
+ width >>= 2;
+ height >>= 2;
+
+ for (int y = ystart; y < slice_end; y++) {
+ for (; z <= y; z++) {
+ FFSWAP(void*, sum0, sum1);
+ ssim_4x4xn_16bit(&main_data[4 * z * main_stride], main_stride,
+ &ref_data[4 * z * ref_stride], ref_stride,
+ sum0, width);
+ }
+
+ ssim += ssim_endn_16bit((const int64_t (*)[4])sum0, (const int64_t (*)[4])sum1, width - 1, max);