You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

576 lines
17KB

  1. /*
  2. * Closed Caption Decoding
  3. * Copyright (c) 2015 Anshul Maheshwari
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "avcodec.h"
  22. #include "ass.h"
  23. #include "libavutil/opt.h"
  24. #define SCREEN_ROWS 15
  25. #define SCREEN_COLUMNS 32
  26. #define SET_FLAG(var, val) ( (var) |= ( 1 << (val)) )
  27. #define UNSET_FLAG(var, val) ( (var) &= ~( 1 << (val)) )
  28. #define CHECK_FLAG(var, val) ( (var) & ( 1 << (val)) )
  29. static const AVRational ass_tb = {1, 100};
  30. /*
  31. * TODO list
  32. * 1) handle font and color completely
  33. */
  34. enum cc_mode {
  35. CCMODE_POPON,
  36. CCMODE_PAINTON,
  37. CCMODE_ROLLUP,
  38. CCMODE_TEXT,
  39. };
  40. enum cc_color_code {
  41. CCCOL_WHITE,
  42. CCCOL_GREEN,
  43. CCCOL_BLUE,
  44. CCCOL_CYAN,
  45. CCCOL_RED,
  46. CCCOL_YELLOW,
  47. CCCOL_MAGENTA,
  48. CCCOL_USERDEFINED,
  49. CCCOL_BLACK,
  50. CCCOL_TRANSPARENT,
  51. };
  52. enum cc_font {
  53. CCFONT_REGULAR,
  54. CCFONT_ITALICS,
  55. CCFONT_UNDERLINED,
  56. CCFONT_UNDERLINED_ITALICS,
  57. };
  58. static const unsigned char pac2_attribs[32][3] = // Color, font, ident
  59. {
  60. { CCCOL_WHITE, CCFONT_REGULAR, 0 }, // 0x40 || 0x60
  61. { CCCOL_WHITE, CCFONT_UNDERLINED, 0 }, // 0x41 || 0x61
  62. { CCCOL_GREEN, CCFONT_REGULAR, 0 }, // 0x42 || 0x62
  63. { CCCOL_GREEN, CCFONT_UNDERLINED, 0 }, // 0x43 || 0x63
  64. { CCCOL_BLUE, CCFONT_REGULAR, 0 }, // 0x44 || 0x64
  65. { CCCOL_BLUE, CCFONT_UNDERLINED, 0 }, // 0x45 || 0x65
  66. { CCCOL_CYAN, CCFONT_REGULAR, 0 }, // 0x46 || 0x66
  67. { CCCOL_CYAN, CCFONT_UNDERLINED, 0 }, // 0x47 || 0x67
  68. { CCCOL_RED, CCFONT_REGULAR, 0 }, // 0x48 || 0x68
  69. { CCCOL_RED, CCFONT_UNDERLINED, 0 }, // 0x49 || 0x69
  70. { CCCOL_YELLOW, CCFONT_REGULAR, 0 }, // 0x4a || 0x6a
  71. { CCCOL_YELLOW, CCFONT_UNDERLINED, 0 }, // 0x4b || 0x6b
  72. { CCCOL_MAGENTA, CCFONT_REGULAR, 0 }, // 0x4c || 0x6c
  73. { CCCOL_MAGENTA, CCFONT_UNDERLINED, 0 }, // 0x4d || 0x6d
  74. { CCCOL_WHITE, CCFONT_ITALICS, 0 }, // 0x4e || 0x6e
  75. { CCCOL_WHITE, CCFONT_UNDERLINED_ITALICS, 0 }, // 0x4f || 0x6f
  76. { CCCOL_WHITE, CCFONT_REGULAR, 0 }, // 0x50 || 0x70
  77. { CCCOL_WHITE, CCFONT_UNDERLINED, 0 }, // 0x51 || 0x71
  78. { CCCOL_WHITE, CCFONT_REGULAR, 4 }, // 0x52 || 0x72
  79. { CCCOL_WHITE, CCFONT_UNDERLINED, 4 }, // 0x53 || 0x73
  80. { CCCOL_WHITE, CCFONT_REGULAR, 8 }, // 0x54 || 0x74
  81. { CCCOL_WHITE, CCFONT_UNDERLINED, 8 }, // 0x55 || 0x75
  82. { CCCOL_WHITE, CCFONT_REGULAR, 12 }, // 0x56 || 0x76
  83. { CCCOL_WHITE, CCFONT_UNDERLINED, 12 }, // 0x57 || 0x77
  84. { CCCOL_WHITE, CCFONT_REGULAR, 16 }, // 0x58 || 0x78
  85. { CCCOL_WHITE, CCFONT_UNDERLINED, 16 }, // 0x59 || 0x79
  86. { CCCOL_WHITE, CCFONT_REGULAR, 20 }, // 0x5a || 0x7a
  87. { CCCOL_WHITE, CCFONT_UNDERLINED, 20 }, // 0x5b || 0x7b
  88. { CCCOL_WHITE, CCFONT_REGULAR, 24 }, // 0x5c || 0x7c
  89. { CCCOL_WHITE, CCFONT_UNDERLINED, 24 }, // 0x5d || 0x7d
  90. { CCCOL_WHITE, CCFONT_REGULAR, 28 }, // 0x5e || 0x7e
  91. { CCCOL_WHITE, CCFONT_UNDERLINED, 28 } // 0x5f || 0x7f
  92. /* total 32 entries */
  93. };
  94. struct Screen {
  95. /* +1 is used to compensate null character of string */
  96. uint8_t characters[SCREEN_ROWS][SCREEN_COLUMNS+1];
  97. uint8_t colors[SCREEN_ROWS][SCREEN_COLUMNS+1];
  98. uint8_t fonts[SCREEN_ROWS][SCREEN_COLUMNS+1];
  99. /*
  100. * Bitmask of used rows; if a bit is not set, the
  101. * corresponding row is not used.
  102. * for setting row 1 use row | (1 << 0)
  103. * for setting row 15 use row | (1 << 14)
  104. */
  105. int16_t row_used;
  106. };
  107. typedef struct CCaptionSubContext {
  108. AVClass *class;
  109. struct Screen screen[2];
  110. int active_screen;
  111. uint8_t cursor_row;
  112. uint8_t cursor_column;
  113. uint8_t cursor_color;
  114. uint8_t cursor_font;
  115. AVBPrint buffer;
  116. int buffer_changed;
  117. int rollup;
  118. enum cc_mode mode;
  119. int64_t start_time;
  120. /* visible screen time */
  121. int64_t startv_time;
  122. int64_t end_time;
  123. char prev_cmd[2];
  124. /* buffer to store pkt data */
  125. AVBufferRef *pktbuf;
  126. } CCaptionSubContext;
  127. static av_cold int init_decoder(AVCodecContext *avctx)
  128. {
  129. int ret;
  130. CCaptionSubContext *ctx = avctx->priv_data;
  131. av_bprint_init(&ctx->buffer, 0, AV_BPRINT_SIZE_UNLIMITED);
  132. /* taking by default roll up to 2 */
  133. ctx->mode = CCMODE_ROLLUP;
  134. ctx->rollup = 2;
  135. ret = ff_ass_subtitle_header_default(avctx);
  136. if (ret < 0) {
  137. return ret;
  138. }
  139. /* allocate pkt buffer */
  140. ctx->pktbuf = av_buffer_alloc(128);
  141. if (!ctx->pktbuf) {
  142. ret = AVERROR(ENOMEM);
  143. }
  144. return ret;
  145. }
  146. static av_cold int close_decoder(AVCodecContext *avctx)
  147. {
  148. CCaptionSubContext *ctx = avctx->priv_data;
  149. av_bprint_finalize(&ctx->buffer, NULL);
  150. av_buffer_unref(&ctx->pktbuf);
  151. return 0;
  152. }
  153. /**
  154. * @param ctx closed caption context just to print log
  155. */
  156. static int write_char(CCaptionSubContext *ctx, struct Screen *screen, char ch)
  157. {
  158. uint8_t col = ctx->cursor_column;
  159. char *row = screen->characters[ctx->cursor_row];
  160. char *font = screen->fonts[ctx->cursor_row];
  161. if (col < SCREEN_COLUMNS) {
  162. row[col] = ch;
  163. font[col] = ctx->cursor_font;
  164. if (ch) ctx->cursor_column++;
  165. return 0;
  166. }
  167. /* We have extra space at end only for null character */
  168. else if (col == SCREEN_COLUMNS && ch == 0) {
  169. row[col] = ch;
  170. return 0;
  171. }
  172. else {
  173. av_log(ctx, AV_LOG_WARNING, "Data Ignored since exceeding screen width\n");
  174. return AVERROR_INVALIDDATA;
  175. }
  176. }
  177. /**
  178. * This function after validating parity bit, also remove it from data pair.
  179. * The first byte doesn't pass parity, we replace it with a solid blank
  180. * and process the pair.
  181. * If the second byte doesn't pass parity, it returns INVALIDDATA
  182. * user can ignore the whole pair and pass the other pair.
  183. */
  184. static int validate_cc_data_pair(uint8_t *cc_data_pair)
  185. {
  186. uint8_t cc_valid = (*cc_data_pair & 4) >>2;
  187. uint8_t cc_type = *cc_data_pair & 3;
  188. if (!cc_valid)
  189. return AVERROR_INVALIDDATA;
  190. // if EIA-608 data then verify parity.
  191. if (cc_type==0 || cc_type==1) {
  192. if (!av_parity(cc_data_pair[2])) {
  193. return AVERROR_INVALIDDATA;
  194. }
  195. if (!av_parity(cc_data_pair[1])) {
  196. cc_data_pair[1]=0x7F;
  197. }
  198. }
  199. //Skip non-data
  200. if ((cc_data_pair[0] == 0xFA || cc_data_pair[0] == 0xFC || cc_data_pair[0] == 0xFD)
  201. && (cc_data_pair[1] & 0x7F) == 0 && (cc_data_pair[2] & 0x7F) == 0)
  202. return AVERROR_PATCHWELCOME;
  203. //skip 708 data
  204. if (cc_type == 3 || cc_type == 2)
  205. return AVERROR_PATCHWELCOME;
  206. /* remove parity bit */
  207. cc_data_pair[1] &= 0x7F;
  208. cc_data_pair[2] &= 0x7F;
  209. return 0;
  210. }
  211. static struct Screen *get_writing_screen(CCaptionSubContext *ctx)
  212. {
  213. switch (ctx->mode) {
  214. case CCMODE_POPON:
  215. // use Inactive screen
  216. return ctx->screen + !ctx->active_screen;
  217. case CCMODE_PAINTON:
  218. case CCMODE_ROLLUP:
  219. case CCMODE_TEXT:
  220. // use active screen
  221. return ctx->screen + ctx->active_screen;
  222. }
  223. /* It was never an option */
  224. return NULL;
  225. }
  226. static void roll_up(CCaptionSubContext *ctx)
  227. {
  228. struct Screen *screen;
  229. int i, keep_lines;
  230. if (ctx->mode == CCMODE_TEXT)
  231. return;
  232. screen = get_writing_screen(ctx);
  233. /* +1 signify cursor_row starts from 0
  234. * Can't keep lines less then row cursor pos
  235. */
  236. keep_lines = FFMIN(ctx->cursor_row + 1, ctx->rollup);
  237. for (i = 0; i < ctx->cursor_row - keep_lines; i++)
  238. UNSET_FLAG(screen->row_used, i);
  239. for (i = 0; i < keep_lines && screen->row_used; i++) {
  240. const int i_row = ctx->cursor_row - keep_lines + i + 1;
  241. memcpy(screen->characters[i_row], screen->characters[i_row+1], SCREEN_COLUMNS);
  242. memcpy(screen->colors[i_row], screen->colors[i_row+1], SCREEN_COLUMNS);
  243. memcpy(screen->fonts[i_row], screen->fonts[i_row+1], SCREEN_COLUMNS);
  244. if (CHECK_FLAG(screen->row_used, i_row + 1))
  245. SET_FLAG(screen->row_used, i_row);
  246. }
  247. UNSET_FLAG(screen->row_used, ctx->cursor_row);
  248. }
  249. static int capture_screen(CCaptionSubContext *ctx)
  250. {
  251. int i;
  252. struct Screen *screen = ctx->screen + ctx->active_screen;
  253. enum cc_font prev_font = CCFONT_REGULAR;
  254. av_bprint_clear(&ctx->buffer);
  255. for (i = 0; screen->row_used && i < SCREEN_ROWS; i++)
  256. {
  257. if (CHECK_FLAG(screen->row_used, i)) {
  258. const char *row = screen->characters[i];
  259. const char *font = screen->fonts[i];
  260. int j = 0;
  261. /* skip leading space */
  262. while (row[j] == ' ')
  263. j++;
  264. for (; j < SCREEN_COLUMNS; j++) {
  265. const char *e_tag = "", *s_tag = "";
  266. if (row[j] == 0)
  267. break;
  268. if (prev_font != font[j]) {
  269. switch (prev_font) {
  270. case CCFONT_ITALICS:
  271. e_tag = "{\\i0}";
  272. break;
  273. case CCFONT_UNDERLINED:
  274. e_tag = "{\\u0}";
  275. break;
  276. case CCFONT_UNDERLINED_ITALICS:
  277. e_tag = "{\\u0}{\\i0}";
  278. break;
  279. }
  280. switch (font[j]) {
  281. case CCFONT_ITALICS:
  282. s_tag = "{\\i1}";
  283. break;
  284. case CCFONT_UNDERLINED:
  285. s_tag = "{\\u1}";
  286. break;
  287. case CCFONT_UNDERLINED_ITALICS:
  288. s_tag = "{\\u1}{\\i1}";
  289. break;
  290. }
  291. }
  292. prev_font = font[j];
  293. av_bprintf(&ctx->buffer, "%s%s%c", e_tag, s_tag, row[j]);
  294. }
  295. av_bprintf(&ctx->buffer, "\\N");
  296. }
  297. }
  298. if (!av_bprint_is_complete(&ctx->buffer))
  299. return AVERROR(ENOMEM);
  300. if (screen->row_used && ctx->buffer.len >= 2) {
  301. ctx->buffer.len -= 2;
  302. ctx->buffer.str[ctx->buffer.len] = 0;
  303. }
  304. ctx->buffer_changed = 1;
  305. return 0;
  306. }
  307. static int reap_screen(CCaptionSubContext *ctx, int64_t pts)
  308. {
  309. ctx->start_time = ctx->startv_time;
  310. ctx->startv_time = pts;
  311. ctx->end_time = pts;
  312. return capture_screen(ctx);
  313. }
  314. static void handle_textattr(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo)
  315. {
  316. int i = lo - 0x20;
  317. struct Screen *screen = get_writing_screen(ctx);
  318. if (i >= 32)
  319. return;
  320. ctx->cursor_color = pac2_attribs[i][0];
  321. ctx->cursor_font = pac2_attribs[i][1];
  322. SET_FLAG(screen->row_used, ctx->cursor_row);
  323. write_char(ctx, screen, ' ');
  324. }
  325. static void handle_pac(CCaptionSubContext *ctx, uint8_t hi, uint8_t lo)
  326. {
  327. static const int8_t row_map[] = {
  328. 11, -1, 1, 2, 3, 4, 12, 13, 14, 15, 5, 6, 7, 8, 9, 10
  329. };
  330. const int index = ( (hi<<1) & 0x0e) | ( (lo>>5) & 0x01 );
  331. struct Screen *screen = get_writing_screen(ctx);
  332. int indent, i;
  333. if (row_map[index] <= 0) {
  334. av_log(ctx, AV_LOG_DEBUG, "Invalid pac index encountered\n");
  335. return;
  336. }
  337. lo &= 0x1f;
  338. ctx->cursor_row = row_map[index] - 1;
  339. ctx->cursor_color = pac2_attribs[lo][0];
  340. ctx->cursor_font = pac2_attribs[lo][1];
  341. ctx->cursor_column = 0;
  342. indent = pac2_attribs[lo][2];
  343. for (i = 0; i < indent; i++) {
  344. write_char(ctx, screen, ' ');
  345. }
  346. }
  347. /**
  348. * @param pts it is required to set end time
  349. */
  350. static void handle_edm(CCaptionSubContext *ctx, int64_t pts)
  351. {
  352. struct Screen *screen = ctx->screen + ctx->active_screen;
  353. reap_screen(ctx, pts);
  354. screen->row_used = 0;
  355. }
  356. static void handle_eoc(CCaptionSubContext *ctx, int64_t pts)
  357. {
  358. handle_edm(ctx,pts);
  359. ctx->active_screen = !ctx->active_screen;
  360. ctx->cursor_column = 0;
  361. }
  362. static void handle_delete_end_of_row(CCaptionSubContext *ctx, char hi, char lo)
  363. {
  364. struct Screen *screen = get_writing_screen(ctx);
  365. write_char(ctx, screen, 0);
  366. }
  367. static void handle_char(CCaptionSubContext *ctx, char hi, char lo, int64_t pts)
  368. {
  369. struct Screen *screen = get_writing_screen(ctx);
  370. SET_FLAG(screen->row_used, ctx->cursor_row);
  371. write_char(ctx, screen, hi);
  372. if (lo) {
  373. write_char(ctx, screen, lo);
  374. }
  375. write_char(ctx, screen, 0);
  376. /* reset prev command since character can repeat */
  377. ctx->prev_cmd[0] = 0;
  378. ctx->prev_cmd[1] = 0;
  379. if (lo)
  380. ff_dlog(ctx, "(%c,%c)\n", hi, lo);
  381. else
  382. ff_dlog(ctx, "(%c)\n", hi);
  383. }
  384. static void process_cc608(CCaptionSubContext *ctx, int64_t pts, uint8_t hi, uint8_t lo)
  385. {
  386. if (hi == ctx->prev_cmd[0] && lo == ctx->prev_cmd[1]) {
  387. /* ignore redundant command */
  388. } else if ( (hi == 0x10 && (lo >= 0x40 && lo <= 0x5f)) ||
  389. ( (hi >= 0x11 && hi <= 0x17) && (lo >= 0x40 && lo <= 0x7f) ) ) {
  390. handle_pac(ctx, hi, lo);
  391. } else if ( ( hi == 0x11 && lo >= 0x20 && lo <= 0x2f ) ||
  392. ( hi == 0x17 && lo >= 0x2e && lo <= 0x2f) ) {
  393. handle_textattr(ctx, hi, lo);
  394. } else if (hi == 0x14 || hi == 0x15 || hi == 0x1c) {
  395. switch (lo) {
  396. case 0x20:
  397. /* resume caption loading */
  398. ctx->mode = CCMODE_POPON;
  399. break;
  400. case 0x24:
  401. handle_delete_end_of_row(ctx, hi, lo);
  402. break;
  403. case 0x25:
  404. case 0x26:
  405. case 0x27:
  406. ctx->rollup = lo - 0x23;
  407. ctx->mode = CCMODE_ROLLUP;
  408. break;
  409. case 0x29:
  410. /* resume direct captioning */
  411. ctx->mode = CCMODE_PAINTON;
  412. break;
  413. case 0x2b:
  414. /* resume text display */
  415. ctx->mode = CCMODE_TEXT;
  416. break;
  417. case 0x2c:
  418. /* erase display memory */
  419. handle_edm(ctx, pts);
  420. break;
  421. case 0x2d:
  422. /* carriage return */
  423. ff_dlog(ctx, "carriage return\n");
  424. reap_screen(ctx, pts);
  425. roll_up(ctx);
  426. ctx->cursor_column = 0;
  427. break;
  428. case 0x2f:
  429. /* end of caption */
  430. ff_dlog(ctx, "handle_eoc\n");
  431. handle_eoc(ctx, pts);
  432. break;
  433. default:
  434. ff_dlog(ctx, "Unknown command 0x%hhx 0x%hhx\n", hi, lo);
  435. break;
  436. }
  437. } else if (hi >= 0x20) {
  438. /* Standard characters (always in pairs) */
  439. handle_char(ctx, hi, lo, pts);
  440. } else {
  441. /* Ignoring all other non data code */
  442. ff_dlog(ctx, "Unknown command 0x%hhx 0x%hhx\n", hi, lo);
  443. }
  444. /* set prev command */
  445. ctx->prev_cmd[0] = hi;
  446. ctx->prev_cmd[1] = lo;
  447. }
  448. static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avpkt)
  449. {
  450. CCaptionSubContext *ctx = avctx->priv_data;
  451. AVSubtitle *sub = data;
  452. uint8_t *bptr = NULL;
  453. int len = avpkt->size;
  454. int ret = 0;
  455. int i;
  456. if (ctx->pktbuf->size < len) {
  457. ret = av_buffer_realloc(&ctx->pktbuf, len);
  458. if (ret < 0) {
  459. av_log(ctx, AV_LOG_WARNING, "Insufficient Memory of %d truncated to %d\n", len, ctx->pktbuf->size);
  460. len = ctx->pktbuf->size;
  461. ret = 0;
  462. }
  463. }
  464. memcpy(ctx->pktbuf->data, avpkt->data, len);
  465. bptr = ctx->pktbuf->data;
  466. for (i = 0; i < len; i += 3) {
  467. uint8_t cc_type = *(bptr + i) & 3;
  468. if (validate_cc_data_pair(bptr + i))
  469. continue;
  470. /* ignoring data field 1 */
  471. if(cc_type == 1)
  472. continue;
  473. else
  474. process_cc608(ctx, avpkt->pts, *(bptr + i + 1) & 0x7f, *(bptr + i + 2) & 0x7f);
  475. if (ctx->buffer_changed && *ctx->buffer.str)
  476. {
  477. int start_time = av_rescale_q(ctx->start_time, avctx->time_base, ass_tb);
  478. int end_time = av_rescale_q(ctx->end_time, avctx->time_base, ass_tb);
  479. ff_dlog(ctx, "cdp writing data (%s)\n",ctx->buffer.str);
  480. ret = ff_ass_add_rect_bprint(sub, &ctx->buffer, start_time, end_time - start_time);
  481. if (ret < 0)
  482. return ret;
  483. sub->pts = av_rescale_q(ctx->start_time, avctx->time_base, AV_TIME_BASE_Q);
  484. ctx->buffer_changed = 0;
  485. }
  486. }
  487. *got_sub = sub->num_rects > 0;
  488. return ret;
  489. }
  490. static const AVOption options[] = {
  491. {NULL}
  492. };
  493. static const AVClass ccaption_dec_class = {
  494. .class_name = "Closed caption Decoder",
  495. .item_name = av_default_item_name,
  496. .option = options,
  497. .version = LIBAVUTIL_VERSION_INT,
  498. };
  499. AVCodec ff_ccaption_decoder = {
  500. .name = "cc_dec",
  501. .long_name = NULL_IF_CONFIG_SMALL("Closed Caption (EIA-608 / CEA-708) Decoder"),
  502. .type = AVMEDIA_TYPE_SUBTITLE,
  503. .id = AV_CODEC_ID_EIA_608,
  504. .priv_data_size = sizeof(CCaptionSubContext),
  505. .init = init_decoder,
  506. .close = close_decoder,
  507. .decode = decode,
  508. .priv_class = &ccaption_dec_class,
  509. };