You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

571 lines
17KB

  1. /*
  2. * Copyright (c) 2017 Paul B Mahol
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * Filter for reading closed captioning data (EIA-608).
  23. * See also https://en.wikipedia.org/wiki/EIA-608
  24. */
  25. #include <string.h>
  26. #include "libavutil/internal.h"
  27. #include "libavutil/opt.h"
  28. #include "libavutil/pixdesc.h"
  29. #include "libavutil/timestamp.h"
  30. #include "avfilter.h"
  31. #include "formats.h"
  32. #include "internal.h"
  33. #include "video.h"
  34. #define LAG 25
  35. #define CLOCK_BITSIZE_MIN 0.2f
  36. #define CLOCK_BITSIZE_MAX 1.5f
  37. #define SYNC_BITSIZE_MIN 12.f
  38. #define SYNC_BITSIZE_MAX 15.f
  39. typedef struct LineItem {
  40. int input;
  41. int output;
  42. float unfiltered;
  43. float filtered;
  44. float average;
  45. float deviation;
  46. } LineItem;
  47. typedef struct CodeItem {
  48. uint8_t bit;
  49. int size;
  50. } CodeItem;
  51. typedef struct ScanItem {
  52. int nb_line;
  53. int found;
  54. int white;
  55. int black;
  56. uint64_t *histogram;
  57. uint8_t byte[2];
  58. CodeItem *code;
  59. LineItem *line;
  60. } ScanItem;
  61. typedef struct ReadEIA608Context {
  62. const AVClass *class;
  63. int start, end;
  64. float spw;
  65. int chp;
  66. int lp;
  67. int depth;
  68. int max;
  69. int nb_allocated;
  70. ScanItem *scan;
  71. void (*read_line[2])(AVFrame *in, int nb_line,
  72. LineItem *line, int lp, int w);
  73. } ReadEIA608Context;
  74. #define OFFSET(x) offsetof(ReadEIA608Context, x)
  75. #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
  76. static const AVOption readeia608_options[] = {
  77. { "scan_min", "set from which line to scan for codes", OFFSET(start), AV_OPT_TYPE_INT, {.i64=0}, 0, INT_MAX, FLAGS },
  78. { "scan_max", "set to which line to scan for codes", OFFSET(end), AV_OPT_TYPE_INT, {.i64=29}, 0, INT_MAX, FLAGS },
  79. { "spw", "set ratio of width reserved for sync code detection", OFFSET(spw), AV_OPT_TYPE_FLOAT, {.dbl=.27}, 0.1, 0.7, FLAGS },
  80. { "chp", "check and apply parity bit", OFFSET(chp), AV_OPT_TYPE_BOOL, {.i64= 0}, 0, 1, FLAGS },
  81. { "lp", "lowpass line prior to processing", OFFSET(lp), AV_OPT_TYPE_BOOL, {.i64= 1}, 0, 1, FLAGS },
  82. { NULL }
  83. };
  84. AVFILTER_DEFINE_CLASS(readeia608);
  85. static int query_formats(AVFilterContext *ctx)
  86. {
  87. static const enum AVPixelFormat pixel_fmts[] = {
  88. AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9,
  89. AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14,
  90. AV_PIX_FMT_GRAY16,
  91. AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
  92. AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
  93. AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV444P,
  94. AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
  95. AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ444P,
  96. AV_PIX_FMT_YUVJ411P,
  97. AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
  98. AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
  99. AV_PIX_FMT_YUV440P10,
  100. AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV420P12,
  101. AV_PIX_FMT_YUV440P12,
  102. AV_PIX_FMT_YUV444P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV420P14,
  103. AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
  104. AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
  105. AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P12, AV_PIX_FMT_YUVA444P16,
  106. AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA422P12, AV_PIX_FMT_YUVA422P16,
  107. AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA420P16,
  108. AV_PIX_FMT_NONE
  109. };
  110. AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
  111. if (!formats)
  112. return AVERROR(ENOMEM);
  113. return ff_set_common_formats(ctx, formats);
  114. }
  115. static int config_filter(AVFilterContext *ctx, int start, int end)
  116. {
  117. ReadEIA608Context *s = ctx->priv;
  118. AVFilterLink *inlink = ctx->inputs[0];
  119. int size = inlink->w + LAG;
  120. if (end >= inlink->h) {
  121. av_log(ctx, AV_LOG_WARNING, "Last line to scan too large, clipping.\n");
  122. end = inlink->h - 1;
  123. }
  124. if (start > end) {
  125. av_log(ctx, AV_LOG_ERROR, "Invalid range.\n");
  126. return AVERROR(EINVAL);
  127. }
  128. if (s->nb_allocated < end - start + 1) {
  129. const int diff = end - start + 1 - s->nb_allocated;
  130. s->scan = av_realloc_f(s->scan, end - start + 1, sizeof(*s->scan));
  131. if (!s->scan)
  132. return AVERROR(ENOMEM);
  133. memset(&s->scan[s->nb_allocated], 0, diff * sizeof(*s->scan));
  134. s->nb_allocated = end - start + 1;
  135. }
  136. for (int i = 0; i < s->nb_allocated; i++) {
  137. ScanItem *scan = &s->scan[i];
  138. if (!scan->histogram)
  139. scan->histogram = av_calloc(s->max + 1, sizeof(*scan->histogram));
  140. if (!scan->line)
  141. scan->line = av_calloc(size, sizeof(*scan->line));
  142. if (!scan->code)
  143. scan->code = av_calloc(size, sizeof(*scan->code));
  144. if (!scan->line || !scan->code || !scan->histogram)
  145. return AVERROR(ENOMEM);
  146. }
  147. s->start = start;
  148. s->end = end;
  149. return 0;
  150. }
  151. static void build_histogram(ReadEIA608Context *s, ScanItem *scan, const LineItem *line, int len)
  152. {
  153. memset(scan->histogram, 0, (s->max + 1) * sizeof(*scan->histogram));
  154. for (int i = LAG; i < len + LAG; i++)
  155. scan->histogram[line[i].input]++;
  156. }
  157. static void find_black_and_white(ReadEIA608Context *s, ScanItem *scan)
  158. {
  159. const int max = s->max;
  160. int start = 0, end = 0, middle;
  161. int black = 0, white = 0;
  162. int cnt;
  163. for (int i = 0; i <= max; i++) {
  164. if (scan->histogram[i]) {
  165. start = i;
  166. break;
  167. }
  168. }
  169. for (int i = max; i >= 0; i--) {
  170. if (scan->histogram[i]) {
  171. end = i;
  172. break;
  173. }
  174. }
  175. middle = start + (end - start) / 2;
  176. cnt = 0;
  177. for (int i = start; i <= middle; i++) {
  178. if (scan->histogram[i] > cnt) {
  179. cnt = scan->histogram[i];
  180. black = i;
  181. }
  182. }
  183. cnt = 0;
  184. for (int i = end; i >= middle; i--) {
  185. if (scan->histogram[i] > cnt) {
  186. cnt = scan->histogram[i];
  187. white = i;
  188. }
  189. }
  190. scan->black = black;
  191. scan->white = white;
  192. }
  193. static float meanf(const LineItem *line, int len)
  194. {
  195. float sum = 0.0, mean = 0.0;
  196. for (int i = 0; i < len; i++)
  197. sum += line[i].filtered;
  198. mean = sum / len;
  199. return mean;
  200. }
  201. static float stddevf(const LineItem *line, int len)
  202. {
  203. float m = meanf(line, len);
  204. float standard_deviation = 0.f;
  205. for (int i = 0; i < len; i++)
  206. standard_deviation += (line[i].filtered - m) * (line[i].filtered - m);
  207. return sqrtf(standard_deviation / (len - 1));
  208. }
  209. static void thresholding(ReadEIA608Context *s, ScanItem *scan, LineItem *line,
  210. int lag, float threshold, float influence, int len)
  211. {
  212. for (int i = lag; i < len + lag; i++) {
  213. line[i].unfiltered = line[i].input / 255.f;
  214. line[i].filtered = line[i].unfiltered;
  215. }
  216. for (int i = 0; i < lag; i++) {
  217. line[i].unfiltered = meanf(line, len * s->spw);
  218. line[i].filtered = line[i].unfiltered;
  219. }
  220. line[lag - 1].average = meanf(line, lag);
  221. line[lag - 1].deviation = stddevf(line, lag);
  222. for (int i = lag; i < len + lag; i++) {
  223. if (fabsf(line[i].unfiltered - line[i-1].average) > threshold * line[i-1].deviation) {
  224. if (line[i].unfiltered > line[i-1].average) {
  225. line[i].output = 255;
  226. } else {
  227. line[i].output = 0;
  228. }
  229. line[i].filtered = influence * line[i].unfiltered + (1.f - influence) * line[i-1].filtered;
  230. } else {
  231. int distance_from_black, distance_from_white;
  232. distance_from_black = FFABS(line[i].input - scan->black);
  233. distance_from_white = FFABS(line[i].input - scan->white);
  234. line[i].output = distance_from_black <= distance_from_white ? 0 : 255;
  235. }
  236. line[i].average = meanf(line + i - lag, lag);
  237. line[i].deviation = stddevf(line + i - lag, lag);
  238. }
  239. }
  240. static int periods(const LineItem *line, CodeItem *code, int len)
  241. {
  242. int hold = line[LAG].output, cnt = 0;
  243. int last = LAG;
  244. memset(code, 0, len * sizeof(*code));
  245. for (int i = LAG + 1; i < len + LAG; i++) {
  246. if (line[i].output != hold) {
  247. code[cnt].size = i - last;
  248. code[cnt].bit = hold;
  249. hold = line[i].output;
  250. last = i;
  251. cnt++;
  252. }
  253. }
  254. code[cnt].size = LAG + len - last;
  255. code[cnt].bit = hold;
  256. return cnt + 1;
  257. }
  258. static void dump_code(AVFilterContext *ctx, ScanItem *scan, int len, int item)
  259. {
  260. av_log(ctx, AV_LOG_DEBUG, "%d:", item);
  261. for (int i = 0; i < len; i++) {
  262. av_log(ctx, AV_LOG_DEBUG, " %03d", scan->code[i].size);
  263. }
  264. av_log(ctx, AV_LOG_DEBUG, "\n");
  265. }
  266. #define READ_LINE(type, name) \
  267. static void read_##name(AVFrame *in, int nb_line, LineItem *line, int lp, int w) \
  268. { \
  269. const type *src = (const type *)(&in->data[0][nb_line * in->linesize[0]]);\
  270. \
  271. if (lp) { \
  272. for (int i = 0; i < w; i++) { \
  273. int a = FFMAX(i - 3, 0); \
  274. int b = FFMAX(i - 2, 0); \
  275. int c = FFMAX(i - 1, 0); \
  276. int d = FFMIN(i + 3, w-1); \
  277. int e = FFMIN(i + 2, w-1); \
  278. int f = FFMIN(i + 1, w-1); \
  279. \
  280. line[LAG + i].input = (src[a] + src[b] + src[c] + src[i] + \
  281. src[d] + src[e] + src[f] + 6) / 7; \
  282. } \
  283. } else { \
  284. for (int i = 0; i < w; i++) { \
  285. line[LAG + i].input = src[i]; \
  286. } \
  287. } \
  288. }
  289. READ_LINE(uint8_t, byte)
  290. READ_LINE(uint16_t, word)
  291. static int config_input(AVFilterLink *inlink)
  292. {
  293. AVFilterContext *ctx = inlink->dst;
  294. ReadEIA608Context *s = ctx->priv;
  295. const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
  296. if (!desc)
  297. return AVERROR_BUG;
  298. s->depth = desc->comp[0].depth;
  299. s->max = (1 << desc->comp[0].depth) - 1;
  300. s->read_line[0] = read_byte;
  301. s->read_line[1] = read_word;
  302. return config_filter(ctx, s->start, s->end);
  303. }
  304. static void extract_line(AVFilterContext *ctx, AVFrame *in, ScanItem *scan, int w, int nb_line)
  305. {
  306. ReadEIA608Context *s = ctx->priv;
  307. LineItem *line = scan->line;
  308. int i, j, ch, len;
  309. uint8_t codes[19] = { 0 };
  310. float bit_size = 0.f;
  311. int parity;
  312. memset(line, 0, (w + LAG) * sizeof(*line));
  313. scan->byte[0] = scan->byte[1] = 0;
  314. scan->found = 0;
  315. s->read_line[s->depth > 8](in, nb_line, line, s->lp, w);
  316. build_histogram(s, scan, line, w);
  317. find_black_and_white(s, scan);
  318. if (scan->white - scan->black < 5)
  319. return;
  320. thresholding(s, scan, line, LAG, 1, 0, w);
  321. len = periods(line, scan->code, w);
  322. dump_code(ctx, scan, len, nb_line);
  323. if (len < 15 ||
  324. scan->code[14].bit != 0 ||
  325. w / (float)scan->code[14].size < SYNC_BITSIZE_MIN ||
  326. w / (float)scan->code[14].size > SYNC_BITSIZE_MAX) {
  327. return;
  328. }
  329. for (i = 14; i < len; i++) {
  330. bit_size += scan->code[i].size;
  331. }
  332. bit_size /= 19.f;
  333. for (i = 1; i < 14; i++) {
  334. if (scan->code[i].size / bit_size > CLOCK_BITSIZE_MAX ||
  335. scan->code[i].size / bit_size < CLOCK_BITSIZE_MIN) {
  336. return;
  337. }
  338. }
  339. if (scan->code[15].size / bit_size < 0.45f) {
  340. return;
  341. }
  342. for (j = 0, i = 14; i < len; i++) {
  343. int run, bit;
  344. run = lrintf(scan->code[i].size / bit_size);
  345. bit = scan->code[i].bit;
  346. for (int k = 0; j < 19 && k < run; k++) {
  347. codes[j++] = bit;
  348. }
  349. if (j >= 19)
  350. break;
  351. }
  352. for (ch = 0; ch < 2; ch++) {
  353. for (parity = 0, i = 0; i < 8; i++) {
  354. int b = codes[3 + ch * 8 + i];
  355. if (b == 255) {
  356. parity++;
  357. b = 1;
  358. } else {
  359. b = 0;
  360. }
  361. scan->byte[ch] |= b << i;
  362. }
  363. if (s->chp) {
  364. if (!(parity & 1)) {
  365. scan->byte[ch] = 0x7F;
  366. }
  367. }
  368. }
  369. scan->nb_line = nb_line;
  370. scan->found = 1;
  371. }
  372. static int extract_lines(AVFilterContext *ctx, void *arg,
  373. int job, int nb_jobs)
  374. {
  375. ReadEIA608Context *s = ctx->priv;
  376. AVFilterLink *inlink = ctx->inputs[0];
  377. const int h = s->end - s->start + 1;
  378. const int start = (h * job) / nb_jobs;
  379. const int end = (h * (job+1)) / nb_jobs;
  380. AVFrame *in = arg;
  381. for (int i = start; i < end; i++) {
  382. ScanItem *scan = &s->scan[i];
  383. extract_line(ctx, in, scan, inlink->w, i);
  384. }
  385. return 0;
  386. }
  387. static int filter_frame(AVFilterLink *inlink, AVFrame *in)
  388. {
  389. AVFilterContext *ctx = inlink->dst;
  390. AVFilterLink *outlink = ctx->outputs[0];
  391. ReadEIA608Context *s = ctx->priv;
  392. int nb_found;
  393. ctx->internal->execute(ctx, extract_lines, in, NULL, FFMIN(FFMAX(s->end - s->start + 1, 1),
  394. ff_filter_get_nb_threads(ctx)));
  395. nb_found = 0;
  396. for (int i = 0; i < s->end - s->start + 1; i++) {
  397. ScanItem *scan = &s->scan[i];
  398. uint8_t key[128], value[128];
  399. if (!scan->found)
  400. continue;
  401. //snprintf(key, sizeof(key), "lavfi.readeia608.%d.bits", nb_found);
  402. //snprintf(value, sizeof(value), "0b%d%d%d%d%d%d%d%d 0b%d%d%d%d%d%d%d%d", codes[3]==255,codes[4]==255,codes[5]==255,codes[6]==255,codes[7]==255,codes[8]==255,codes[9]==255,codes[10]==255,codes[11]==255,codes[12]==255,codes[13]==255,codes[14]==255,codes[15]==255,codes[16]==255,codes[17]==255,codes[18]==255);
  403. //av_dict_set(&in->metadata, key, value, 0);
  404. snprintf(key, sizeof(key), "lavfi.readeia608.%d.cc", nb_found);
  405. snprintf(value, sizeof(value), "0x%02X%02X", scan->byte[0], scan->byte[1]);
  406. av_dict_set(&in->metadata, key, value, 0);
  407. snprintf(key, sizeof(key), "lavfi.readeia608.%d.line", nb_found);
  408. snprintf(value, sizeof(value), "%d", scan->nb_line);
  409. av_dict_set(&in->metadata, key, value, 0);
  410. nb_found++;
  411. }
  412. return ff_filter_frame(outlink, in);
  413. }
  414. static av_cold void uninit(AVFilterContext *ctx)
  415. {
  416. ReadEIA608Context *s = ctx->priv;
  417. for (int i = 0; i < s->nb_allocated; i++) {
  418. ScanItem *scan = &s->scan[i];
  419. av_freep(&scan->histogram);
  420. av_freep(&scan->code);
  421. av_freep(&scan->line);
  422. }
  423. s->nb_allocated = 0;
  424. av_freep(&s->scan);
  425. }
  426. static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
  427. char *res, int res_len, int flags)
  428. {
  429. ReadEIA608Context *s = ctx->priv;
  430. int ret, start = s->start, end = s->end;
  431. ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
  432. if (ret < 0)
  433. return ret;
  434. ret = config_filter(ctx, s->start, s->end);
  435. if (ret < 0) {
  436. s->start = start;
  437. s->end = end;
  438. }
  439. return 0;
  440. }
  441. static const AVFilterPad readeia608_inputs[] = {
  442. {
  443. .name = "default",
  444. .type = AVMEDIA_TYPE_VIDEO,
  445. .filter_frame = filter_frame,
  446. .config_props = config_input,
  447. },
  448. { NULL }
  449. };
  450. static const AVFilterPad readeia608_outputs[] = {
  451. {
  452. .name = "default",
  453. .type = AVMEDIA_TYPE_VIDEO,
  454. },
  455. { NULL }
  456. };
  457. AVFilter ff_vf_readeia608 = {
  458. .name = "readeia608",
  459. .description = NULL_IF_CONFIG_SMALL("Read EIA-608 Closed Caption codes from input video and write them to frame metadata."),
  460. .priv_size = sizeof(ReadEIA608Context),
  461. .priv_class = &readeia608_class,
  462. .query_formats = query_formats,
  463. .inputs = readeia608_inputs,
  464. .outputs = readeia608_outputs,
  465. .uninit = uninit,
  466. .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
  467. .process_command = process_command,
  468. };