You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

545 lines
16KB

  1. /*
  2. * 3GPP TS 26.245 Timed Text decoder
  3. * Copyright (c) 2012 Philip Langdale <philipl@overt.org>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "avcodec.h"
  22. #include "ass.h"
  23. #include "libavutil/avstring.h"
  24. #include "libavutil/common.h"
  25. #include "libavutil/bprint.h"
  26. #include "libavutil/intreadwrite.h"
  27. #include "libavutil/mem.h"
  28. #define STYLE_FLAG_BOLD (1<<0)
  29. #define STYLE_FLAG_ITALIC (1<<1)
  30. #define STYLE_FLAG_UNDERLINE (1<<2)
  31. #define BOX_SIZE_INITIAL 40
  32. #define STYL_BOX (1<<0)
  33. #define HLIT_BOX (1<<1)
  34. #define HCLR_BOX (1<<2)
  35. #define TWRP_BOX (1<<3)
  36. #define BOTTOM_LEFT 1
  37. #define BOTTOM_CENTER 2
  38. #define BOTTOM_RIGHT 3
  39. #define MIDDLE_LEFT 4
  40. #define MIDDLE_CENTER 5
  41. #define MIDDLE_RIGHT 6
  42. #define TOP_LEFT 7
  43. #define TOP_CENTER 8
  44. #define TOP_RIGHT 9
  45. typedef struct {
  46. char *font;
  47. int fontsize;
  48. int color;
  49. int back_color;
  50. int bold;
  51. int italic;
  52. int underline;
  53. int alignment;
  54. } MovTextDefault;
  55. typedef struct {
  56. uint16_t fontID;
  57. char *font;
  58. } FontRecord;
  59. typedef struct {
  60. uint16_t style_start;
  61. uint16_t style_end;
  62. uint8_t style_flag;
  63. uint8_t fontsize;
  64. uint16_t style_fontID;
  65. } StyleBox;
  66. typedef struct {
  67. uint16_t hlit_start;
  68. uint16_t hlit_end;
  69. } HighlightBox;
  70. typedef struct {
  71. uint8_t hlit_color[4];
  72. } HilightcolorBox;
  73. typedef struct {
  74. uint8_t wrap_flag;
  75. } TextWrapBox;
  76. typedef struct {
  77. StyleBox **s;
  78. StyleBox *s_temp;
  79. HighlightBox h;
  80. HilightcolorBox c;
  81. FontRecord **ftab;
  82. FontRecord *ftab_temp;
  83. TextWrapBox w;
  84. MovTextDefault d;
  85. uint8_t box_flags;
  86. uint16_t style_entries, ftab_entries;
  87. uint64_t tracksize;
  88. int size_var;
  89. int count_s, count_f;
  90. int readorder;
  91. } MovTextContext;
  92. typedef struct {
  93. uint32_t type;
  94. size_t base_size;
  95. int (*decode)(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt);
  96. } Box;
  97. static void mov_text_cleanup(MovTextContext *m)
  98. {
  99. int i;
  100. if (m->box_flags & STYL_BOX) {
  101. for(i = 0; i < m->count_s; i++) {
  102. av_freep(&m->s[i]);
  103. }
  104. av_freep(&m->s);
  105. m->count_s = 0;
  106. m->style_entries = 0;
  107. }
  108. }
  109. static void mov_text_cleanup_ftab(MovTextContext *m)
  110. {
  111. int i;
  112. if (m->ftab_temp)
  113. av_freep(&m->ftab_temp->font);
  114. av_freep(&m->ftab_temp);
  115. if (m->ftab) {
  116. for(i = 0; i < m->count_f; i++) {
  117. av_freep(&m->ftab[i]->font);
  118. av_freep(&m->ftab[i]);
  119. }
  120. }
  121. av_freep(&m->ftab);
  122. }
  123. static int mov_text_tx3g(AVCodecContext *avctx, MovTextContext *m)
  124. {
  125. uint8_t *tx3g_ptr = avctx->extradata;
  126. int i, box_size, font_length;
  127. int8_t v_align, h_align;
  128. int style_fontID;
  129. StyleBox s_default;
  130. m->count_f = 0;
  131. m->ftab_entries = 0;
  132. box_size = BOX_SIZE_INITIAL; /* Size till ftab_entries */
  133. if (avctx->extradata_size < box_size)
  134. return -1;
  135. // Display Flags
  136. tx3g_ptr += 4;
  137. // Alignment
  138. h_align = *tx3g_ptr++;
  139. v_align = *tx3g_ptr++;
  140. if (h_align == 0) {
  141. if (v_align == 0)
  142. m->d.alignment = TOP_LEFT;
  143. if (v_align == 1)
  144. m->d.alignment = MIDDLE_LEFT;
  145. if (v_align == -1)
  146. m->d.alignment = BOTTOM_LEFT;
  147. }
  148. if (h_align == 1) {
  149. if (v_align == 0)
  150. m->d.alignment = TOP_CENTER;
  151. if (v_align == 1)
  152. m->d.alignment = MIDDLE_CENTER;
  153. if (v_align == -1)
  154. m->d.alignment = BOTTOM_CENTER;
  155. }
  156. if (h_align == -1) {
  157. if (v_align == 0)
  158. m->d.alignment = TOP_RIGHT;
  159. if (v_align == 1)
  160. m->d.alignment = MIDDLE_RIGHT;
  161. if (v_align == -1)
  162. m->d.alignment = BOTTOM_RIGHT;
  163. }
  164. // Background Color
  165. m->d.back_color = AV_RB24(tx3g_ptr);
  166. tx3g_ptr += 4;
  167. // BoxRecord
  168. tx3g_ptr += 8;
  169. // StyleRecord
  170. tx3g_ptr += 4;
  171. // fontID
  172. style_fontID = AV_RB16(tx3g_ptr);
  173. tx3g_ptr += 2;
  174. // face-style-flags
  175. s_default.style_flag = *tx3g_ptr++;
  176. m->d.bold = s_default.style_flag & STYLE_FLAG_BOLD;
  177. m->d.italic = s_default.style_flag & STYLE_FLAG_ITALIC;
  178. m->d.underline = s_default.style_flag & STYLE_FLAG_UNDERLINE;
  179. // fontsize
  180. m->d.fontsize = *tx3g_ptr++;
  181. // Primary color
  182. m->d.color = AV_RB24(tx3g_ptr);
  183. tx3g_ptr += 4;
  184. // FontRecord
  185. // FontRecord Size
  186. tx3g_ptr += 4;
  187. // ftab
  188. tx3g_ptr += 4;
  189. m->ftab_entries = AV_RB16(tx3g_ptr);
  190. tx3g_ptr += 2;
  191. for (i = 0; i < m->ftab_entries; i++) {
  192. box_size += 3;
  193. if (avctx->extradata_size < box_size) {
  194. mov_text_cleanup_ftab(m);
  195. m->ftab_entries = 0;
  196. return -1;
  197. }
  198. m->ftab_temp = av_mallocz(sizeof(*m->ftab_temp));
  199. if (!m->ftab_temp) {
  200. mov_text_cleanup_ftab(m);
  201. return AVERROR(ENOMEM);
  202. }
  203. m->ftab_temp->fontID = AV_RB16(tx3g_ptr);
  204. tx3g_ptr += 2;
  205. font_length = *tx3g_ptr++;
  206. box_size = box_size + font_length;
  207. if (avctx->extradata_size < box_size) {
  208. mov_text_cleanup_ftab(m);
  209. m->ftab_entries = 0;
  210. return -1;
  211. }
  212. m->ftab_temp->font = av_malloc(font_length + 1);
  213. if (!m->ftab_temp->font) {
  214. mov_text_cleanup_ftab(m);
  215. return AVERROR(ENOMEM);
  216. }
  217. memcpy(m->ftab_temp->font, tx3g_ptr, font_length);
  218. m->ftab_temp->font[font_length] = '\0';
  219. av_dynarray_add(&m->ftab, &m->count_f, m->ftab_temp);
  220. if (!m->ftab) {
  221. mov_text_cleanup_ftab(m);
  222. return AVERROR(ENOMEM);
  223. }
  224. m->ftab_temp = NULL;
  225. tx3g_ptr = tx3g_ptr + font_length;
  226. }
  227. for (i = 0; i < m->ftab_entries; i++) {
  228. if (style_fontID == m->ftab[i]->fontID)
  229. m->d.font = m->ftab[i]->font;
  230. }
  231. return 0;
  232. }
  233. static int decode_twrp(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  234. {
  235. m->box_flags |= TWRP_BOX;
  236. m->w.wrap_flag = *tsmb++;
  237. return 0;
  238. }
  239. static int decode_hlit(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  240. {
  241. m->box_flags |= HLIT_BOX;
  242. m->h.hlit_start = AV_RB16(tsmb);
  243. tsmb += 2;
  244. m->h.hlit_end = AV_RB16(tsmb);
  245. tsmb += 2;
  246. return 0;
  247. }
  248. static int decode_hclr(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  249. {
  250. m->box_flags |= HCLR_BOX;
  251. memcpy(m->c.hlit_color, tsmb, 4);
  252. tsmb += 4;
  253. return 0;
  254. }
  255. static int decode_styl(const uint8_t *tsmb, MovTextContext *m, AVPacket *avpkt)
  256. {
  257. int i;
  258. int style_entries = AV_RB16(tsmb);
  259. tsmb += 2;
  260. // A single style record is of length 12 bytes.
  261. if (m->tracksize + m->size_var + 2 + style_entries * 12 > avpkt->size)
  262. return -1;
  263. m->style_entries = style_entries;
  264. m->box_flags |= STYL_BOX;
  265. for(i = 0; i < m->style_entries; i++) {
  266. m->s_temp = av_malloc(sizeof(*m->s_temp));
  267. if (!m->s_temp) {
  268. mov_text_cleanup(m);
  269. return AVERROR(ENOMEM);
  270. }
  271. m->s_temp->style_start = AV_RB16(tsmb);
  272. tsmb += 2;
  273. m->s_temp->style_end = AV_RB16(tsmb);
  274. tsmb += 2;
  275. m->s_temp->style_fontID = AV_RB16(tsmb);
  276. tsmb += 2;
  277. m->s_temp->style_flag = AV_RB8(tsmb);
  278. tsmb++;
  279. m->s_temp->fontsize = AV_RB8(tsmb);
  280. av_dynarray_add(&m->s, &m->count_s, m->s_temp);
  281. if(!m->s) {
  282. mov_text_cleanup(m);
  283. return AVERROR(ENOMEM);
  284. }
  285. tsmb++;
  286. // text-color-rgba
  287. tsmb += 4;
  288. }
  289. return 0;
  290. }
  291. static const Box box_types[] = {
  292. { MKBETAG('s','t','y','l'), 2, decode_styl },
  293. { MKBETAG('h','l','i','t'), 4, decode_hlit },
  294. { MKBETAG('h','c','l','r'), 4, decode_hclr },
  295. { MKBETAG('t','w','r','p'), 1, decode_twrp }
  296. };
  297. const static size_t box_count = FF_ARRAY_ELEMS(box_types);
  298. static int text_to_ass(AVBPrint *buf, const char *text, const char *text_end,
  299. MovTextContext *m)
  300. {
  301. int i = 0;
  302. int j = 0;
  303. int text_pos = 0;
  304. if (text < text_end && m->box_flags & TWRP_BOX) {
  305. if (m->w.wrap_flag == 1) {
  306. av_bprintf(buf, "{\\q1}"); /* End of line wrap */
  307. } else {
  308. av_bprintf(buf, "{\\q2}"); /* No wrap */
  309. }
  310. }
  311. while (text < text_end) {
  312. if (m->box_flags & STYL_BOX) {
  313. for (i = 0; i < m->style_entries; i++) {
  314. if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) {
  315. av_bprintf(buf, "{\\r}");
  316. }
  317. }
  318. for (i = 0; i < m->style_entries; i++) {
  319. if (m->s[i]->style_flag && text_pos == m->s[i]->style_start) {
  320. if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
  321. av_bprintf(buf, "{\\b1}");
  322. if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
  323. av_bprintf(buf, "{\\i1}");
  324. if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
  325. av_bprintf(buf, "{\\u1}");
  326. av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
  327. for (j = 0; j < m->ftab_entries; j++) {
  328. if (m->s[i]->style_fontID == m->ftab[j]->fontID)
  329. av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font);
  330. }
  331. }
  332. }
  333. }
  334. if (m->box_flags & HLIT_BOX) {
  335. if (text_pos == m->h.hlit_start) {
  336. /* If hclr box is present, set the secondary color to the color
  337. * specified. Otherwise, set primary color to white and secondary
  338. * color to black. These colors will come from TextSampleModifier
  339. * boxes in future and inverse video technique for highlight will
  340. * be implemented.
  341. */
  342. if (m->box_flags & HCLR_BOX) {
  343. av_bprintf(buf, "{\\2c&H%02x%02x%02x&}", m->c.hlit_color[2],
  344. m->c.hlit_color[1], m->c.hlit_color[0]);
  345. } else {
  346. av_bprintf(buf, "{\\1c&H000000&}{\\2c&HFFFFFF&}");
  347. }
  348. }
  349. if (text_pos == m->h.hlit_end) {
  350. if (m->box_flags & HCLR_BOX) {
  351. av_bprintf(buf, "{\\2c&H000000&}");
  352. } else {
  353. av_bprintf(buf, "{\\1c&HFFFFFF&}{\\2c&H000000&}");
  354. }
  355. }
  356. }
  357. switch (*text) {
  358. case '\r':
  359. break;
  360. case '\n':
  361. av_bprintf(buf, "\\N");
  362. break;
  363. default:
  364. av_bprint_chars(buf, *text, 1);
  365. break;
  366. }
  367. text++;
  368. text_pos++;
  369. }
  370. return 0;
  371. }
  372. static int mov_text_init(AVCodecContext *avctx) {
  373. /*
  374. * TODO: Handle the default text style.
  375. * NB: Most players ignore styles completely, with the result that
  376. * it's very common to find files where the default style is broken
  377. * and respecting it results in a worse experience than ignoring it.
  378. */
  379. int ret;
  380. MovTextContext *m = avctx->priv_data;
  381. ret = mov_text_tx3g(avctx, m);
  382. if (ret == 0) {
  383. return ff_ass_subtitle_header(avctx, m->d.font, m->d.fontsize, m->d.color,
  384. m->d.back_color, m->d.bold, m->d.italic,
  385. m->d.underline, ASS_DEFAULT_BORDERSTYLE,
  386. m->d.alignment);
  387. } else
  388. return ff_ass_subtitle_header_default(avctx);
  389. }
  390. static int mov_text_decode_frame(AVCodecContext *avctx,
  391. void *data, int *got_sub_ptr, AVPacket *avpkt)
  392. {
  393. AVSubtitle *sub = data;
  394. MovTextContext *m = avctx->priv_data;
  395. int ret;
  396. AVBPrint buf;
  397. char *ptr = avpkt->data;
  398. char *end;
  399. int text_length, tsmb_type, ret_tsmb;
  400. uint64_t tsmb_size;
  401. const uint8_t *tsmb;
  402. if (!ptr || avpkt->size < 2)
  403. return AVERROR_INVALIDDATA;
  404. /*
  405. * A packet of size two with value zero is an empty subtitle
  406. * used to mark the end of the previous non-empty subtitle.
  407. * We can just drop them here as we have duration information
  408. * already. If the value is non-zero, then it's technically a
  409. * bad packet.
  410. */
  411. if (avpkt->size == 2)
  412. return AV_RB16(ptr) == 0 ? 0 : AVERROR_INVALIDDATA;
  413. /*
  414. * The first two bytes of the packet are the length of the text string
  415. * In complex cases, there are style descriptors appended to the string
  416. * so we can't just assume the packet size is the string size.
  417. */
  418. text_length = AV_RB16(ptr);
  419. end = ptr + FFMIN(2 + text_length, avpkt->size);
  420. ptr += 2;
  421. tsmb_size = 0;
  422. m->tracksize = 2 + text_length;
  423. m->style_entries = 0;
  424. m->box_flags = 0;
  425. m->count_s = 0;
  426. // Note that the spec recommends lines be no longer than 2048 characters.
  427. av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
  428. if (text_length + 2 != avpkt->size) {
  429. while (m->tracksize + 8 <= avpkt->size) {
  430. // A box is a minimum of 8 bytes.
  431. tsmb = ptr + m->tracksize - 2;
  432. tsmb_size = AV_RB32(tsmb);
  433. tsmb += 4;
  434. tsmb_type = AV_RB32(tsmb);
  435. tsmb += 4;
  436. if (tsmb_size == 1) {
  437. if (m->tracksize + 16 > avpkt->size)
  438. break;
  439. tsmb_size = AV_RB64(tsmb);
  440. tsmb += 8;
  441. m->size_var = 16;
  442. } else
  443. m->size_var = 8;
  444. //size_var is equal to 8 or 16 depending on the size of box
  445. if (tsmb_size == 0) {
  446. av_log(avctx, AV_LOG_ERROR, "tsmb_size is 0\n");
  447. return AVERROR_INVALIDDATA;
  448. }
  449. if (tsmb_size > avpkt->size - m->tracksize)
  450. break;
  451. for (size_t i = 0; i < box_count; i++) {
  452. if (tsmb_type == box_types[i].type) {
  453. if (m->tracksize + m->size_var + box_types[i].base_size > avpkt->size)
  454. break;
  455. ret_tsmb = box_types[i].decode(tsmb, m, avpkt);
  456. if (ret_tsmb == -1)
  457. break;
  458. }
  459. }
  460. m->tracksize = m->tracksize + tsmb_size;
  461. }
  462. text_to_ass(&buf, ptr, end, m);
  463. mov_text_cleanup(m);
  464. } else
  465. text_to_ass(&buf, ptr, end, m);
  466. ret = ff_ass_add_rect(sub, buf.str, m->readorder++, 0, NULL, NULL);
  467. av_bprint_finalize(&buf, NULL);
  468. if (ret < 0)
  469. return ret;
  470. *got_sub_ptr = sub->num_rects > 0;
  471. return avpkt->size;
  472. }
  473. static int mov_text_decode_close(AVCodecContext *avctx)
  474. {
  475. MovTextContext *m = avctx->priv_data;
  476. mov_text_cleanup_ftab(m);
  477. return 0;
  478. }
  479. static void mov_text_flush(AVCodecContext *avctx)
  480. {
  481. MovTextContext *m = avctx->priv_data;
  482. if (!(avctx->flags2 & AV_CODEC_FLAG2_RO_FLUSH_NOOP))
  483. m->readorder = 0;
  484. }
  485. AVCodec ff_movtext_decoder = {
  486. .name = "mov_text",
  487. .long_name = NULL_IF_CONFIG_SMALL("3GPP Timed Text subtitle"),
  488. .type = AVMEDIA_TYPE_SUBTITLE,
  489. .id = AV_CODEC_ID_MOV_TEXT,
  490. .priv_data_size = sizeof(MovTextContext),
  491. .init = mov_text_init,
  492. .decode = mov_text_decode_frame,
  493. .close = mov_text_decode_close,
  494. .flush = mov_text_flush,
  495. };