You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

596 lines
17KB

  1. /*
  2. * 3GPP TS 26.245 Timed Text decoder
  3. * Copyright (c) 2012 Philip Langdale <philipl@overt.org>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "avcodec.h"
  22. #include "ass.h"
  23. #include "libavutil/avstring.h"
  24. #include "libavutil/common.h"
  25. #include "libavutil/bprint.h"
  26. #include "libavutil/intreadwrite.h"
  27. #include "libavutil/mem.h"
  28. #define STYLE_FLAG_BOLD (1<<0)
  29. #define STYLE_FLAG_ITALIC (1<<1)
  30. #define STYLE_FLAG_UNDERLINE (1<<2)
  31. #define BOX_SIZE_INITIAL 40
  32. #define STYL_BOX (1<<0)
  33. #define HLIT_BOX (1<<1)
  34. #define HCLR_BOX (1<<2)
  35. #define TWRP_BOX (1<<3)
  36. #define BOTTOM_LEFT 1
  37. #define BOTTOM_CENTER 2
  38. #define BOTTOM_RIGHT 3
  39. #define MIDDLE_LEFT 4
  40. #define MIDDLE_CENTER 5
  41. #define MIDDLE_RIGHT 6
  42. #define TOP_LEFT 7
  43. #define TOP_CENTER 8
  44. #define TOP_RIGHT 9
  45. #define RGB_TO_BGR(c) (((c) & 0xff) << 16 | ((c) & 0xff00) | (((c) >> 16) & 0xff))
  46. typedef struct {
  47. uint16_t fontID;
  48. const char *font;
  49. uint8_t fontsize;
  50. int color;
  51. int back_color;
  52. uint8_t bold;
  53. uint8_t italic;
  54. uint8_t underline;
  55. int alignment;
  56. } MovTextDefault;
  57. typedef struct {
  58. uint16_t fontID;
  59. char *font;
  60. } FontRecord;
  61. typedef struct {
  62. uint16_t style_start;
  63. uint16_t style_end;
  64. uint8_t style_flag;
  65. uint8_t bold;
  66. uint8_t italic;
  67. uint8_t underline;
  68. uint8_t fontsize;
  69. uint16_t style_fontID;
  70. } StyleBox;
  71. typedef struct {
  72. uint16_t hlit_start;
  73. uint16_t hlit_end;
  74. } HighlightBox;
  75. typedef struct {
  76. uint8_t hlit_color[4];
  77. } HilightcolorBox;
  78. typedef struct {
  79. uint8_t wrap_flag;
  80. } TextWrapBox;
  81. typedef struct {
  82. StyleBox **s;
  83. StyleBox *s_temp;
  84. HighlightBox h;
  85. HilightcolorBox c;
  86. FontRecord **ftab;
  87. FontRecord *ftab_temp;
  88. TextWrapBox w;
  89. MovTextDefault d;
  90. uint8_t box_flags;
  91. uint16_t style_entries, ftab_entries;
  92. uint64_t tracksize;
  93. int size_var;
  94. int count_s, count_f;
  95. int readorder;
  96. } MovTextContext;
  97. typedef struct {
  98. uint32_t type;
  99. size_t base_size;
  100. int (*decode)(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt);
  101. } Box;
  102. static void mov_text_cleanup(MovTextContext *m)
  103. {
  104. int i;
  105. if (m->box_flags & STYL_BOX) {
  106. for(i = 0; i < m->count_s; i++) {
  107. av_freep(&m->s[i]);
  108. }
  109. av_freep(&m->s);
  110. m->count_s = 0;
  111. m->style_entries = 0;
  112. }
  113. }
  114. static void mov_text_cleanup_ftab(MovTextContext *m)
  115. {
  116. int i;
  117. if (m->ftab_temp)
  118. av_freep(&m->ftab_temp->font);
  119. av_freep(&m->ftab_temp);
  120. if (m->ftab) {
  121. for(i = 0; i < m->count_f; i++) {
  122. av_freep(&m->ftab[i]->font);
  123. av_freep(&m->ftab[i]);
  124. }
  125. }
  126. av_freep(&m->ftab);
  127. }
  128. static int mov_text_tx3g(AVCodecContext *avctx, MovTextContext *m)
  129. {
  130. uint8_t *tx3g_ptr = avctx->extradata;
  131. int i, box_size, font_length;
  132. int8_t v_align, h_align;
  133. StyleBox s_default;
  134. m->count_f = 0;
  135. m->ftab_entries = 0;
  136. box_size = BOX_SIZE_INITIAL; /* Size till ftab_entries */
  137. if (avctx->extradata_size < box_size)
  138. return -1;
  139. // Display Flags
  140. tx3g_ptr += 4;
  141. // Alignment
  142. h_align = *tx3g_ptr++;
  143. v_align = *tx3g_ptr++;
  144. if (h_align == 0) {
  145. if (v_align == 0)
  146. m->d.alignment = TOP_LEFT;
  147. if (v_align == 1)
  148. m->d.alignment = MIDDLE_LEFT;
  149. if (v_align == -1)
  150. m->d.alignment = BOTTOM_LEFT;
  151. }
  152. if (h_align == 1) {
  153. if (v_align == 0)
  154. m->d.alignment = TOP_CENTER;
  155. if (v_align == 1)
  156. m->d.alignment = MIDDLE_CENTER;
  157. if (v_align == -1)
  158. m->d.alignment = BOTTOM_CENTER;
  159. }
  160. if (h_align == -1) {
  161. if (v_align == 0)
  162. m->d.alignment = TOP_RIGHT;
  163. if (v_align == 1)
  164. m->d.alignment = MIDDLE_RIGHT;
  165. if (v_align == -1)
  166. m->d.alignment = BOTTOM_RIGHT;
  167. }
  168. // Background Color
  169. m->d.back_color = AV_RB24(tx3g_ptr);
  170. tx3g_ptr += 4;
  171. // BoxRecord
  172. tx3g_ptr += 8;
  173. // StyleRecord
  174. tx3g_ptr += 4;
  175. // fontID
  176. m->d.fontID = AV_RB16(tx3g_ptr);
  177. tx3g_ptr += 2;
  178. // face-style-flags
  179. s_default.style_flag = *tx3g_ptr++;
  180. m->d.bold = !!(s_default.style_flag & STYLE_FLAG_BOLD);
  181. m->d.italic = !!(s_default.style_flag & STYLE_FLAG_ITALIC);
  182. m->d.underline = !!(s_default.style_flag & STYLE_FLAG_UNDERLINE);
  183. // fontsize
  184. m->d.fontsize = *tx3g_ptr++;
  185. // Primary color
  186. m->d.color = AV_RB24(tx3g_ptr);
  187. tx3g_ptr += 4;
  188. // FontRecord
  189. // FontRecord Size
  190. tx3g_ptr += 4;
  191. // ftab
  192. tx3g_ptr += 4;
  193. m->ftab_entries = AV_RB16(tx3g_ptr);
  194. tx3g_ptr += 2;
  195. for (i = 0; i < m->ftab_entries; i++) {
  196. box_size += 3;
  197. if (avctx->extradata_size < box_size) {
  198. mov_text_cleanup_ftab(m);
  199. m->ftab_entries = 0;
  200. return -1;
  201. }
  202. m->ftab_temp = av_mallocz(sizeof(*m->ftab_temp));
  203. if (!m->ftab_temp) {
  204. mov_text_cleanup_ftab(m);
  205. return AVERROR(ENOMEM);
  206. }
  207. m->ftab_temp->fontID = AV_RB16(tx3g_ptr);
  208. tx3g_ptr += 2;
  209. font_length = *tx3g_ptr++;
  210. box_size = box_size + font_length;
  211. if (avctx->extradata_size < box_size) {
  212. mov_text_cleanup_ftab(m);
  213. m->ftab_entries = 0;
  214. return -1;
  215. }
  216. m->ftab_temp->font = av_malloc(font_length + 1);
  217. if (!m->ftab_temp->font) {
  218. mov_text_cleanup_ftab(m);
  219. return AVERROR(ENOMEM);
  220. }
  221. memcpy(m->ftab_temp->font, tx3g_ptr, font_length);
  222. m->ftab_temp->font[font_length] = '\0';
  223. av_dynarray_add(&m->ftab, &m->count_f, m->ftab_temp);
  224. if (!m->ftab) {
  225. mov_text_cleanup_ftab(m);
  226. return AVERROR(ENOMEM);
  227. }
  228. m->ftab_temp = NULL;
  229. tx3g_ptr = tx3g_ptr + font_length;
  230. }
  231. // In case of broken header, init default font
  232. m->d.font = ASS_DEFAULT_FONT;
  233. for (i = 0; i < m->ftab_entries; i++) {
  234. if (m->d.fontID == m->ftab[i]->fontID)
  235. m->d.font = m->ftab[i]->font;
  236. }
  237. return 0;
  238. }
  239. static int decode_twrp(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  240. {
  241. m->box_flags |= TWRP_BOX;
  242. m->w.wrap_flag = *tsmb++;
  243. return 0;
  244. }
  245. static int decode_hlit(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  246. {
  247. m->box_flags |= HLIT_BOX;
  248. m->h.hlit_start = AV_RB16(tsmb);
  249. tsmb += 2;
  250. m->h.hlit_end = AV_RB16(tsmb);
  251. tsmb += 2;
  252. return 0;
  253. }
  254. static int decode_hclr(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  255. {
  256. m->box_flags |= HCLR_BOX;
  257. memcpy(m->c.hlit_color, tsmb, 4);
  258. tsmb += 4;
  259. return 0;
  260. }
  261. static int decode_styl(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  262. {
  263. int i;
  264. int style_entries = AV_RB16(tsmb);
  265. tsmb += 2;
  266. // A single style record is of length 12 bytes.
  267. if (m->tracksize + m->size_var + 2 + style_entries * 12 > avpkt->size)
  268. return -1;
  269. m->style_entries = style_entries;
  270. m->box_flags |= STYL_BOX;
  271. for(i = 0; i < m->style_entries; i++) {
  272. m->s_temp = av_malloc(sizeof(*m->s_temp));
  273. if (!m->s_temp) {
  274. mov_text_cleanup(m);
  275. return AVERROR(ENOMEM);
  276. }
  277. m->s_temp->style_start = AV_RB16(tsmb);
  278. tsmb += 2;
  279. m->s_temp->style_end = AV_RB16(tsmb);
  280. if ( m->s_temp->style_end < m->s_temp->style_start
  281. || (m->count_s && m->s_temp->style_start < m->s[m->count_s - 1]->style_end)) {
  282. av_freep(&m->s_temp);
  283. mov_text_cleanup(m);
  284. return AVERROR(ENOMEM);
  285. }
  286. tsmb += 2;
  287. m->s_temp->style_fontID = AV_RB16(tsmb);
  288. tsmb += 2;
  289. m->s_temp->style_flag = AV_RB8(tsmb);
  290. m->s_temp->bold = !!(m->s_temp->style_flag & STYLE_FLAG_BOLD);
  291. m->s_temp->italic = !!(m->s_temp->style_flag & STYLE_FLAG_ITALIC);
  292. m->s_temp->underline = !!(m->s_temp->style_flag & STYLE_FLAG_UNDERLINE);
  293. tsmb++;
  294. m->s_temp->fontsize = AV_RB8(tsmb);
  295. av_dynarray_add(&m->s, &m->count_s, m->s_temp);
  296. if(!m->s) {
  297. mov_text_cleanup(m);
  298. return AVERROR(ENOMEM);
  299. }
  300. tsmb++;
  301. // text-color-rgba
  302. tsmb += 4;
  303. }
  304. return 0;
  305. }
  306. static const Box box_types[] = {
  307. { MKBETAG('s','t','y','l'), 2, decode_styl },
  308. { MKBETAG('h','l','i','t'), 4, decode_hlit },
  309. { MKBETAG('h','c','l','r'), 4, decode_hclr },
  310. { MKBETAG('t','w','r','p'), 1, decode_twrp }
  311. };
  312. const static size_t box_count = FF_ARRAY_ELEMS(box_types);
  313. // Return byte length of the UTF-8 sequence starting at text[0]. 0 on error.
  314. static int get_utf8_length_at(const char *text, const char *text_end)
  315. {
  316. const char *start = text;
  317. int err = 0;
  318. uint32_t c;
  319. GET_UTF8(c, text < text_end ? (uint8_t)*text++ : (err = 1, 0), goto error;);
  320. if (err)
  321. goto error;
  322. return text - start;
  323. error:
  324. return 0;
  325. }
  326. static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
  327. AVCodecContext *avctx)
  328. {
  329. MovTextContext *m = avctx->priv_data;
  330. int i = 0;
  331. int text_pos = 0;
  332. int style_active = 0;
  333. int entry = 0;
  334. if (text < text_end && m->box_flags & TWRP_BOX) {
  335. if (m->w.wrap_flag == 1) {
  336. av_bprintf(buf, "{\\q1}"); /* End of line wrap */
  337. } else {
  338. av_bprintf(buf, "{\\q2}"); /* No wrap */
  339. }
  340. }
  341. while (text < text_end) {
  342. int len;
  343. if ((m->box_flags & STYL_BOX) && entry < m->style_entries) {
  344. if (text_pos == m->s[entry]->style_start) {
  345. style_active = 1;
  346. if (m->s[entry]->bold ^ m->d.bold)
  347. av_bprintf(buf, "{\\b%d}", m->s[entry]->bold);
  348. if (m->s[entry]->italic ^ m->d.italic)
  349. av_bprintf(buf, "{\\i%d}", m->s[entry]->italic);
  350. if (m->s[entry]->underline ^ m->d.underline)
  351. av_bprintf(buf, "{\\u%d}", m->s[entry]->underline);
  352. if (m->s[entry]->fontsize != m->d.fontsize)
  353. av_bprintf(buf, "{\\fs%d}", m->s[entry]->fontsize);
  354. if (m->s[entry]->style_fontID != m->d.fontID)
  355. for (i = 0; i < m->ftab_entries; i++) {
  356. if (m->s[entry]->style_fontID == m->ftab[i]->fontID)
  357. av_bprintf(buf, "{\\fn%s}", m->ftab[i]->font);
  358. }
  359. }
  360. if (text_pos == m->s[entry]->style_end) {
  361. if (style_active) {
  362. av_bprintf(buf, "{\\r}");
  363. style_active = 0;
  364. }
  365. entry++;
  366. }
  367. }
  368. if (m->box_flags & HLIT_BOX) {
  369. if (text_pos == m->h.hlit_start) {
  370. /* If hclr box is present, set the secondary color to the color
  371. * specified. Otherwise, set primary color to white and secondary
  372. * color to black. These colors will come from TextSampleModifier
  373. * boxes in future and inverse video technique for highlight will
  374. * be implemented.
  375. */
  376. if (m->box_flags & HCLR_BOX) {
  377. av_bprintf(buf, "{\\2c&H%02x%02x%02x&}", m->c.hlit_color[2],
  378. m->c.hlit_color[1], m->c.hlit_color[0]);
  379. } else {
  380. av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}");
  381. }
  382. }
  383. if (text_pos == m->h.hlit_end) {
  384. if (m->box_flags & HCLR_BOX) {
  385. av_bprintf(buf, "{\\2c&H000000&}");
  386. } else {
  387. av_bprintf(buf, "{\\1c&HFFFFFF&}{\\2c&H000000&}");
  388. }
  389. }
  390. }
  391. len = get_utf8_length_at(text, text_end);
  392. if (len < 1) {
  393. av_log(avctx, AV_LOG_ERROR, "invalid UTF-8 byte in subtitle\n");
  394. len = 1;
  395. }
  396. for (i = 0; i < len; i++) {
  397. switch (*text) {
  398. case '\r':
  399. break;
  400. case '\n':
  401. av_bprintf(buf, "\\N");
  402. break;
  403. default:
  404. av_bprint_chars(buf, *text, 1);
  405. break;
  406. }
  407. text++;
  408. }
  409. text_pos++;
  410. }
  411. return 0;
  412. }
  413. static int mov_text_init(AVCodecContext *avctx) {
  414. /*
  415. * TODO: Handle the default text style.
  416. * NB: Most players ignore styles completely, with the result that
  417. * it's very common to find files where the default style is broken
  418. * and respecting it results in a worse experience than ignoring it.
  419. */
  420. int ret;
  421. MovTextContext *m = avctx->priv_data;
  422. ret = mov_text_tx3g(avctx, m);
  423. if (ret == 0) {
  424. return ff_ass_subtitle_header(avctx, m->d.font, m->d.fontsize,
  425. RGB_TO_BGR(m->d.color),
  426. RGB_TO_BGR(m->d.back_color),
  427. m->d.bold, m->d.italic, m->d.underline,
  428. ASS_DEFAULT_BORDERSTYLE, m->d.alignment);
  429. } else
  430. return ff_ass_subtitle_header_default(avctx);
  431. }
  432. static int mov_text_decode_frame(AVCodecContext *avctx,
  433. void *data, int *got_sub_ptr, AVPacket *avpkt)
  434. {
  435. AVSubtitle *sub = data;
  436. MovTextContext *m = avctx->priv_data;
  437. int ret;
  438. AVBPrint buf;
  439. char *ptr = avpkt->data;
  440. char *end;
  441. int text_length, tsmb_type, ret_tsmb;
  442. uint64_t tsmb_size;
  443. const uint8_t *tsmb;
  444. size_t i;
  445. if (!ptr || avpkt->size < 2)
  446. return AVERROR_INVALIDDATA;
  447. /*
  448. * A packet of size two with value zero is an empty subtitle
  449. * used to mark the end of the previous non-empty subtitle.
  450. * We can just drop them here as we have duration information
  451. * already. If the value is non-zero, then it's technically a
  452. * bad packet.
  453. */
  454. if (avpkt->size == 2)
  455. return AV_RB16(ptr) == 0 ? 0 : AVERROR_INVALIDDATA;
  456. /*
  457. * The first two bytes of the packet are the length of the text string
  458. * In complex cases, there are style descriptors appended to the string
  459. * so we can't just assume the packet size is the string size.
  460. */
  461. text_length = AV_RB16(ptr);
  462. end = ptr + FFMIN(2 + text_length, avpkt->size);
  463. ptr += 2;
  464. mov_text_cleanup(m);
  465. tsmb_size = 0;
  466. m->tracksize = 2 + text_length;
  467. m->style_entries = 0;
  468. m->box_flags = 0;
  469. m->count_s = 0;
  470. // Note that the spec recommends lines be no longer than 2048 characters.
  471. av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
  472. if (text_length + 2 != avpkt->size) {
  473. while (m->tracksize + 8 <= avpkt->size) {
  474. // A box is a minimum of 8 bytes.
  475. tsmb = ptr + m->tracksize - 2;
  476. tsmb_size = AV_RB32(tsmb);
  477. tsmb += 4;
  478. tsmb_type = AV_RB32(tsmb);
  479. tsmb += 4;
  480. if (tsmb_size == 1) {
  481. if (m->tracksize + 16 > avpkt->size)
  482. break;
  483. tsmb_size = AV_RB64(tsmb);
  484. tsmb += 8;
  485. m->size_var = 16;
  486. } else
  487. m->size_var = 8;
  488. //size_var is equal to 8 or 16 depending on the size of box
  489. if (tsmb_size == 0) {
  490. av_log(avctx, AV_LOG_ERROR, "tsmb_size is 0\n");
  491. return AVERROR_INVALIDDATA;
  492. }
  493. if (tsmb_size > avpkt->size - m->tracksize)
  494. break;
  495. for (i = 0; i < box_count; i++) {
  496. if (tsmb_type == box_types[i].type) {
  497. if (m->tracksize + m->size_var + box_types[i].base_size > avpkt->size)
  498. break;
  499. ret_tsmb = box_types[i].decode(tsmb, m, avpkt);
  500. if (ret_tsmb == -1)
  501. break;
  502. }
  503. }
  504. m->tracksize = m->tracksize + tsmb_size;
  505. }
  506. text_to_ass(&buf, ptr, end, avctx);
  507. mov_text_cleanup(m);
  508. } else
  509. text_to_ass(&buf, ptr, end, avctx);
  510. ret = ff_ass_add_rect(sub, buf.str, m->readorder++, 0, NULL, NULL);
  511. av_bprint_finalize(&buf, NULL);
  512. if (ret < 0)
  513. return ret;
  514. *got_sub_ptr = sub->num_rects > 0;
  515. return avpkt->size;
  516. }
  517. static int mov_text_decode_close(AVCodecContext *avctx)
  518. {
  519. MovTextContext *m = avctx->priv_data;
  520. mov_text_cleanup_ftab(m);
  521. mov_text_cleanup(m);
  522. return 0;
  523. }
  524. static void mov_text_flush(AVCodecContext *avctx)
  525. {
  526. MovTextContext *m = avctx->priv_data;
  527. if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
  528. m->readorder = 0;
  529. }
  530. AVCodec ff_movtext_decoder = {
  531. .name = "mov_text",
  532. .long_name = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"),
  533. .type = AVMEDIA_TYPE_SUBTITLE,
  534. .id = AV_CODEC_ID_MOV_TEXT,
  535. .priv_data_size = sizeof(MovTextContext),
  536. .init = mov_text_init,
  537. .decode = mov_text_decode_frame,
  538. .close = mov_text_decode_close,
  539. .flush = mov_text_flush,
  540. };