git.sesse.net Git - ffmpeg/blob - libavfilter/vf_readeia608.c

   1 /*
   2  * Copyright (c) 2017 Paul B Mahol
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /**
  22  * @file
  23  * Filter for reading closed captioning data (EIA-608).
  24  * See also https://en.wikipedia.org/wiki/EIA-608
  25  */
  26
  27 #include <string.h>
  28
  29 #include "libavutil/internal.h"
  30 #include "libavutil/opt.h"
  31 #include "libavutil/pixdesc.h"
  32 #include "libavutil/timestamp.h"
  33
  34 #include "avfilter.h"
  35 #include "formats.h"
  36 #include "internal.h"
  37 #include "video.h"
  38
  39 #define LAG 25
  40 #define SYNC_MIN 12.f
  41 #define SYNC_MAX 15.f
  42
  43 typedef struct LineItem {
  44     int   input;
  45     int   output;
  46
  47     float unfiltered;
  48     float filtered;
  49     float average;
  50     float deviation;
  51 } LineItem;
  52
  53 typedef struct CodeItem {
  54     uint8_t bit;
  55     int size;
  56 } CodeItem;
  57
  58 typedef struct ReadEIA608Context {
  59     const AVClass *class;
  60     int start, end;
  61     int nb_found;
  62     int white;
  63     int black;
  64     float spw;
  65     int chp;
  66     int lp;
  67
  68     uint64_t histogram[256];
  69
  70     CodeItem *code;
  71     LineItem *line;
  72 } ReadEIA608Context;
  73
  74 #define OFFSET(x) offsetof(ReadEIA608Context, x)
  75 #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
  76
  77 static const AVOption readeia608_options[] = {
  78     { "scan_min", "set from which line to scan for codes",               OFFSET(start), AV_OPT_TYPE_INT,   {.i64=0},     0, INT_MAX, FLAGS },
  79     { "scan_max", "set to which line to scan for codes",                 OFFSET(end),   AV_OPT_TYPE_INT,   {.i64=29},    0, INT_MAX, FLAGS },
  80     { "spw",      "set ratio of width reserved for sync code detection", OFFSET(spw),   AV_OPT_TYPE_FLOAT, {.dbl=.27}, 0.1,     0.7, FLAGS },
  81     { "chp",      "check and apply parity bit",                          OFFSET(chp),   AV_OPT_TYPE_BOOL,  {.i64= 0},    0,       1, FLAGS },
  82     { "lp",       "lowpass line prior to processing",                    OFFSET(lp),    AV_OPT_TYPE_BOOL,  {.i64= 1},    0,       1, FLAGS },
  83     { NULL }
  84 };
  85
  86 AVFILTER_DEFINE_CLASS(readeia608);
  87
  88 static int query_formats(AVFilterContext *ctx)
  89 {
  90     static const enum AVPixelFormat pixel_fmts[] = {
  91         AV_PIX_FMT_GRAY8,
  92         AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
  93         AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
  94         AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
  95         AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
  96         AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
  97         AV_PIX_FMT_YUVJ411P,
  98         AV_PIX_FMT_NONE
  99     };
 100     AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
 101     if (!formats)
 102         return AVERROR(ENOMEM);
 103     return ff_set_common_formats(ctx, formats);
 104 }
 105
 106 static int config_input(AVFilterLink *inlink)
 107 {
 108     AVFilterContext *ctx = inlink->dst;
 109     ReadEIA608Context *s = ctx->priv;
 110     int size = inlink->w + LAG;
 111
 112     if (s->end >= inlink->h) {
 113         av_log(ctx, AV_LOG_WARNING, "Last line to scan too large, clipping.\n");
 114         s->end = inlink->h - 1;
 115     }
 116
 117     if (s->start > s->end) {
 118         av_log(ctx, AV_LOG_ERROR, "Invalid range.\n");
 119         return AVERROR(EINVAL);
 120     }
 121
 122     s->line = av_calloc(size, sizeof(*s->line));
 123     s->code = av_calloc(size, sizeof(*s->code));
 124     if (!s->line || !s->code)
 125         return AVERROR(ENOMEM);
 126
 127     return 0;
 128 }
 129
 130 static void build_histogram(ReadEIA608Context *s, const LineItem *line, int len)
 131 {
 132     memset(s->histogram, 0, sizeof(s->histogram));
 133
 134     for (int i = LAG; i < len + LAG; i++)
 135         s->histogram[line[i].input]++;
 136 }
 137
 138 static void find_black_and_white(ReadEIA608Context *s)
 139 {
 140     int start = 0, end = 0, middle;
 141     int black = 0, white = 0;
 142     int cnt;
 143
 144     for (int i = 0; i < 256; i++) {
 145         if (s->histogram[i]) {
 146             start = i;
 147             break;
 148         }
 149     }
 150
 151     for (int i = 255; i >= 0; i--) {
 152         if (s->histogram[i]) {
 153             end = i;
 154             break;
 155         }
 156     }
 157
 158     middle = start + (end - start) / 2;
 159
 160     cnt = 0;
 161     for (int i = start; i <= middle; i++) {
 162         if (s->histogram[i] > cnt) {
 163             cnt = s->histogram[i];
 164             black = i;
 165         }
 166     }
 167
 168     cnt = 0;
 169     for (int i = end; i >= middle; i--) {
 170         if (s->histogram[i] > cnt) {
 171             cnt = s->histogram[i];
 172             white = i;
 173         }
 174     }
 175
 176     s->black = black;
 177     s->white = white;
 178 }
 179
 180 static float meanf(const LineItem *line, int len)
 181 {
 182     float sum = 0.0, mean = 0.0;
 183
 184     for (int i = 0; i < len; i++)
 185         sum += line[i].filtered;
 186
 187     mean = sum / len;
 188
 189     return mean;
 190 }
 191
 192 static float stddevf(const LineItem *line, int len)
 193 {
 194     float m = meanf(line, len);
 195     float standard_deviation = 0.f;
 196
 197     for (int i = 0; i < len; i++)
 198         standard_deviation += (line[i].filtered - m) * (line[i].filtered - m);
 199
 200     return sqrtf(standard_deviation / (len - 1));
 201 }
 202
 203 static void thresholding(ReadEIA608Context *s, LineItem *line,
 204                          int lag, float threshold, float influence, int len)
 205 {
 206     for (int i = lag; i < len + lag; i++) {
 207         line[i].unfiltered = line[i].input / 255.f;
 208         line[i].filtered = line[i].unfiltered;
 209     }
 210
 211     for (int i = 0; i < lag; i++) {
 212         line[i].unfiltered = meanf(line, len * s->spw);
 213         line[i].filtered = line[i].unfiltered;
 214     }
 215
 216     line[lag - 1].average   = meanf(line, lag);
 217     line[lag - 1].deviation = stddevf(line, lag);
 218
 219     for (int i = lag; i < len + lag; i++) {
 220         if (fabsf(line[i].unfiltered - line[i-1].average) > threshold * line[i-1].deviation) {
 221             if (line[i].unfiltered > line[i-1].average) {
 222                 line[i].output = 255;
 223             } else {
 224                 line[i].output = 0;
 225             }
 226
 227             line[i].filtered = influence * line[i].unfiltered + (1.f - influence) * line[i-1].filtered;
 228         } else {
 229             int distance_from_black, distance_from_white;
 230
 231             distance_from_black = FFABS(line[i].input - s->black);
 232             distance_from_white = FFABS(line[i].input - s->white);
 233
 234             line[i].output = distance_from_black <= distance_from_white ? 0 : 255;
 235         }
 236
 237         line[i].average   = meanf(line + i - lag, lag);
 238         line[i].deviation = stddevf(line + i - lag, lag);
 239     }
 240 }
 241
 242 static int periods(const LineItem *line, CodeItem *code, int len)
 243 {
 244     int hold = line[LAG].output, cnt = 0;
 245     int last = LAG;
 246
 247     memset(code, 0, len * sizeof(*code));
 248
 249     for (int i = LAG + 1; i < len + LAG; i++) {
 250         if (line[i].output != hold) {
 251             code[cnt].size = i - last;
 252             code[cnt].bit = hold;
 253             hold = line[i].output;
 254             last = i;
 255             cnt++;
 256         }
 257     }
 258
 259     code[cnt].size = LAG + len - last;
 260     code[cnt].bit = hold;
 261
 262     return cnt + 1;
 263 }
 264
 265 static void dump_code(AVFilterContext *ctx, int len, int item)
 266 {
 267     ReadEIA608Context *s = ctx->priv;
 268
 269     av_log(ctx, AV_LOG_DEBUG, "%d:", item);
 270     for (int i = 0; i < len; i++) {
 271         av_log(ctx, AV_LOG_DEBUG, " %03d", s->code[i].size);
 272     }
 273     av_log(ctx, AV_LOG_DEBUG, "\n");
 274 }
 275
 276 static void extract_line(AVFilterContext *ctx, AVFrame *in, int w, int nb_line)
 277 {
 278     ReadEIA608Context *s = ctx->priv;
 279     LineItem *line = s->line;
 280     int i, j, ch, len;
 281     const uint8_t *src;
 282     uint8_t byte[2] = { 0 };
 283     uint8_t codes[19] = { 0 };
 284     float bit_size = 0.f;
 285     int parity;
 286
 287     memset(line, 0, (w + LAG) * sizeof(*line));
 288
 289     src = &in->data[0][nb_line * in->linesize[0]];
 290     if (s->lp) {
 291         for (i = 0; i < w; i++) {
 292             int a = FFMAX(i - 3, 0);
 293             int b = FFMAX(i - 2, 0);
 294             int c = FFMAX(i - 1, 0);
 295             int d = FFMIN(i + 3, w-1);
 296             int e = FFMIN(i + 2, w-1);
 297             int f = FFMIN(i + 1, w-1);
 298
 299             line[LAG + i].input = (src[a] + src[b] + src[c] + src[i] + src[d] + src[e] + src[f] + 6) / 7;
 300         }
 301     } else {
 302         for (i = 0; i < w; i++) {
 303             line[LAG + i].input = src[i];
 304         }
 305     }
 306
 307     build_histogram(s, line, w);
 308     find_black_and_white(s);
 309     if (s->white - s->black < 5)
 310         return;
 311
 312     thresholding(s, line, LAG, 1, 0, w);
 313     len = periods(line, s->code, w);
 314     dump_code(ctx, len, nb_line);
 315     if (len < 15 ||
 316         s->code[14].bit != 0 ||
 317         w / (float)s->code[14].size < SYNC_MIN ||
 318         w / (float)s->code[14].size > SYNC_MAX) {
 319         return;
 320     }
 321
 322     for (i = 14; i < len; i++) {
 323         bit_size += s->code[i].size;
 324     }
 325
 326     bit_size /= 19.f;
 327     for (i = 1; i < 14; i++) {
 328         if (s->code[i].size > bit_size * 1.5f) {
 329             return;
 330         }
 331     }
 332
 333     if (s->code[15].size / bit_size < 0.45f) {
 334         return;
 335     }
 336
 337     for (j = 0, i = 14; i < len; i++) {
 338         int run, bit;
 339
 340         run = lrintf(s->code[i].size / bit_size);
 341         bit = s->code[i].bit;
 342
 343         for (int k = 0; j < 19 && k < run; k++) {
 344             codes[j++] = bit;
 345         }
 346
 347         if (j >= 19)
 348             break;
 349     }
 350
 351     for (ch = 0; ch < 2; ch++) {
 352         for (parity = 0, i = 0; i < 8; i++) {
 353             int b = codes[3 + ch * 8 + i];
 354
 355             if (b == 255) {
 356                 parity++;
 357                 b = 1;
 358             } else {
 359                 b = 0;
 360             }
 361             byte[ch] |= b << i;
 362         }
 363
 364         if (s->chp) {
 365             if (!(parity & 1)) {
 366                 byte[ch] = 0x7F;
 367             }
 368         }
 369     }
 370
 371     {
 372         uint8_t key[128], value[128];
 373
 374         //snprintf(key, sizeof(key), "lavfi.readeia608.%d.bits", s->nb_found);
 375         //snprintf(value, sizeof(value), "0b%d%d%d%d%d%d%d%d 0b%d%d%d%d%d%d%d%d", codes[3]==255,codes[4]==255,codes[5]==255,codes[6]==255,codes[7]==255,codes[8]==255,codes[9]==255,codes[10]==255,codes[11]==255,codes[12]==255,codes[13]==255,codes[14]==255,codes[15]==255,codes[16]==255,codes[17]==255,codes[18]==255);
 376         //av_dict_set(&in->metadata, key, value, 0);
 377
 378         snprintf(key, sizeof(key), "lavfi.readeia608.%d.cc", s->nb_found);
 379         snprintf(value, sizeof(value), "0x%02X%02X", byte[0], byte[1]);
 380         av_dict_set(&in->metadata, key, value, 0);
 381
 382         snprintf(key, sizeof(key), "lavfi.readeia608.%d.line", s->nb_found);
 383         snprintf(value, sizeof(value), "%d", nb_line);
 384         av_dict_set(&in->metadata, key, value, 0);
 385     }
 386
 387     s->nb_found++;
 388 }
 389
 390 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 391 {
 392     AVFilterContext *ctx  = inlink->dst;
 393     AVFilterLink *outlink = ctx->outputs[0];
 394     ReadEIA608Context *s = ctx->priv;
 395     int i;
 396
 397     s->nb_found = 0;
 398     for (i = s->start; i <= s->end; i++)
 399         extract_line(ctx, in, inlink->w, i);
 400
 401     return ff_filter_frame(outlink, in);
 402 }
 403
 404 static av_cold void uninit(AVFilterContext *ctx)
 405 {
 406     ReadEIA608Context *s = ctx->priv;
 407
 408     av_freep(&s->code);
 409     av_freep(&s->line);
 410 }
 411
 412 static const AVFilterPad readeia608_inputs[] = {
 413     {
 414         .name         = "default",
 415         .type         = AVMEDIA_TYPE_VIDEO,
 416         .filter_frame = filter_frame,
 417         .config_props = config_input,
 418     },
 419     { NULL }
 420 };
 421
 422 static const AVFilterPad readeia608_outputs[] = {
 423     {
 424         .name = "default",
 425         .type = AVMEDIA_TYPE_VIDEO,
 426     },
 427     { NULL }
 428 };
 429
 430 AVFilter ff_vf_readeia608 = {
 431     .name          = "readeia608",
 432     .description   = NULL_IF_CONFIG_SMALL("Read EIA-608 Closed Caption codes from input video and write them to frame metadata."),
 433     .priv_size     = sizeof(ReadEIA608Context),
 434     .priv_class    = &readeia608_class,
 435     .query_formats = query_formats,
 436     .inputs        = readeia608_inputs,
 437     .outputs       = readeia608_outputs,
 438     .uninit        = uninit,
 439     .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
 440 };