You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

177 lines
5.6KB

  1. /*
  2. * Copyright (c) 2012 Clément Bœsch
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. /**
  21. * @file
  22. * SAMI subtitle decoder
  23. * @see http://msdn.microsoft.com/en-us/library/ms971327.aspx
  24. */
  25. #include "ass.h"
  26. #include "libavutil/avstring.h"
  27. #include "libavutil/bprint.h"
  28. #include "htmlsubtitles.h"
  29. typedef struct {
  30. AVBPrint source;
  31. AVBPrint content;
  32. AVBPrint encoded_source;
  33. AVBPrint encoded_content;
  34. AVBPrint full;
  35. } SAMIContext;
  36. static int sami_paragraph_to_ass(AVCodecContext *avctx, const char *src)
  37. {
  38. SAMIContext *sami = avctx->priv_data;
  39. int ret = 0;
  40. char *tag = NULL;
  41. char *dupsrc = av_strdup(src);
  42. char *p = dupsrc;
  43. AVBPrint *dst_content = &sami->encoded_content;
  44. AVBPrint *dst_source = &sami->encoded_source;
  45. av_bprint_clear(&sami->encoded_content);
  46. av_bprint_clear(&sami->content);
  47. av_bprint_clear(&sami->encoded_source);
  48. for (;;) {
  49. char *saveptr = NULL;
  50. int prev_chr_is_space = 0;
  51. AVBPrint *dst = &sami->content;
  52. /* parse & extract paragraph tag */
  53. p = av_stristr(p, "<P");
  54. if (!p)
  55. break;
  56. if (p[2] != '>' && !av_isspace(p[2])) { // avoid confusion with tags such as <PRE>
  57. p++;
  58. continue;
  59. }
  60. if (dst->len) // add a separator with the previous paragraph if there was one
  61. av_bprintf(dst, "\\N");
  62. tag = av_strtok(p, ">", &saveptr);
  63. if (!tag || !saveptr)
  64. break;
  65. p = saveptr;
  66. /* check if the current paragraph is the "source" (speaker name) */
  67. if (av_stristr(tag, "ID=Source") || av_stristr(tag, "ID=\"Source\"")) {
  68. dst = &sami->source;
  69. av_bprint_clear(dst);
  70. }
  71. /* if empty event -> skip subtitle */
  72. while (av_isspace(*p))
  73. p++;
  74. if (!strncmp(p, "&nbsp;", 6)) {
  75. ret = -1;
  76. goto end;
  77. }
  78. /* extract the text, stripping most of the tags */
  79. while (*p) {
  80. if (*p == '<') {
  81. if (!av_strncasecmp(p, "<P", 2) && (p[2] == '>' || av_isspace(p[2])))
  82. break;
  83. }
  84. if (!av_strncasecmp(p, "<BR", 3)) {
  85. av_bprintf(dst, "\\N");
  86. p++;
  87. while (*p && *p != '>')
  88. p++;
  89. if (!*p)
  90. break;
  91. if (*p == '>')
  92. p++;
  93. continue;
  94. }
  95. if (!av_isspace(*p))
  96. av_bprint_chars(dst, *p, 1);
  97. else if (!prev_chr_is_space)
  98. av_bprint_chars(dst, ' ', 1);
  99. prev_chr_is_space = av_isspace(*p);
  100. p++;
  101. }
  102. }
  103. av_bprint_clear(&sami->full);
  104. if (sami->source.len) {
  105. ff_htmlmarkup_to_ass(avctx, dst_source, sami->source.str);
  106. av_bprintf(&sami->full, "{\\i1}%s{\\i0}\\N", sami->encoded_source.str);
  107. }
  108. ff_htmlmarkup_to_ass(avctx, dst_content, sami->content.str);
  109. av_bprintf(&sami->full, "%s", sami->encoded_content.str);
  110. end:
  111. av_free(dupsrc);
  112. return ret;
  113. }
  114. static int sami_decode_frame(AVCodecContext *avctx,
  115. void *data, int *got_sub_ptr, AVPacket *avpkt)
  116. {
  117. AVSubtitle *sub = data;
  118. const char *ptr = avpkt->data;
  119. SAMIContext *sami = avctx->priv_data;
  120. if (ptr && avpkt->size > 0 && !sami_paragraph_to_ass(avctx, ptr)) {
  121. int ts_start = av_rescale_q(avpkt->pts, avctx->time_base, (AVRational){1,100});
  122. int ts_duration = avpkt->duration != -1 ?
  123. av_rescale_q(avpkt->duration, avctx->time_base, (AVRational){1,100}) : -1;
  124. int ret = ff_ass_add_rect_bprint(sub, &sami->full, ts_start, ts_duration);
  125. if (ret < 0)
  126. return ret;
  127. }
  128. *got_sub_ptr = sub->num_rects > 0;
  129. return avpkt->size;
  130. }
  131. static av_cold int sami_init(AVCodecContext *avctx)
  132. {
  133. SAMIContext *sami = avctx->priv_data;
  134. av_bprint_init(&sami->source, 0, 2048);
  135. av_bprint_init(&sami->content, 0, 2048);
  136. av_bprint_init(&sami->encoded_source, 0, 2048);
  137. av_bprint_init(&sami->encoded_content, 0, 2048);
  138. av_bprint_init(&sami->full, 0, 2048);
  139. return ff_ass_subtitle_header_default(avctx);
  140. }
  141. static av_cold int sami_close(AVCodecContext *avctx)
  142. {
  143. SAMIContext *sami = avctx->priv_data;
  144. av_bprint_finalize(&sami->source, NULL);
  145. av_bprint_finalize(&sami->content, NULL);
  146. av_bprint_finalize(&sami->encoded_source, NULL);
  147. av_bprint_finalize(&sami->encoded_content, NULL);
  148. av_bprint_finalize(&sami->full, NULL);
  149. return 0;
  150. }
  151. AVCodec ff_sami_decoder = {
  152. .name = "sami",
  153. .long_name = NULL_IF_CONFIG_SMALL("SAMI subtitle"),
  154. .type = AVMEDIA_TYPE_SUBTITLE,
  155. .id = AV_CODEC_ID_SAMI,
  156. .priv_data_size = sizeof(SAMIContext),
  157. .init = sami_init,
  158. .close = sami_close,
  159. .decode = sami_decode_frame,
  160. };