You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

399 lines
11KB

  1. /*
  2. * Copyright (c) 2012-2013 Clément Bœsch <u pkh me>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "avformat.h"
  21. #include "subtitles.h"
  22. #include "avio_internal.h"
  23. #include "libavutil/avassert.h"
  24. #include "libavutil/avstring.h"
  25. void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb)
  26. {
  27. int i;
  28. r->pb = pb;
  29. r->buf_pos = r->buf_len = 0;
  30. r->type = FF_UTF_8;
  31. for (i = 0; i < 2; i++)
  32. r->buf[r->buf_len++] = avio_r8(r->pb);
  33. if (strncmp("\xFF\xFE", r->buf, 2) == 0) {
  34. r->type = FF_UTF16LE;
  35. r->buf_pos += 2;
  36. } else if (strncmp("\xFE\xFF", r->buf, 2) == 0) {
  37. r->type = FF_UTF16BE;
  38. r->buf_pos += 2;
  39. } else {
  40. r->buf[r->buf_len++] = avio_r8(r->pb);
  41. if (strncmp("\xEF\xBB\xBF", r->buf, 3) == 0) {
  42. // UTF8
  43. r->buf_pos += 3;
  44. }
  45. }
  46. if (s && (r->type == FF_UTF16LE || r->type == FF_UTF16BE))
  47. av_log(s, AV_LOG_INFO,
  48. "UTF16 is automatically converted to UTF8, do not specify a character encoding\n");
  49. }
  50. void ff_text_init_buf(FFTextReader *r, void *buf, size_t size)
  51. {
  52. memset(&r->buf_pb, 0, sizeof(r->buf_pb));
  53. ffio_init_context(&r->buf_pb, buf, size, 0, NULL, NULL, NULL, NULL);
  54. ff_text_init_avio(NULL, r, &r->buf_pb);
  55. }
  56. int64_t ff_text_pos(FFTextReader *r)
  57. {
  58. return avio_tell(r->pb) - r->buf_len + r->buf_pos;
  59. }
  60. int ff_text_r8(FFTextReader *r)
  61. {
  62. uint32_t val;
  63. uint8_t tmp;
  64. if (r->buf_pos < r->buf_len)
  65. return r->buf[r->buf_pos++];
  66. if (r->type == FF_UTF16LE) {
  67. GET_UTF16(val, avio_rl16(r->pb), return 0;)
  68. } else if (r->type == FF_UTF16BE) {
  69. GET_UTF16(val, avio_rb16(r->pb), return 0;)
  70. } else {
  71. return avio_r8(r->pb);
  72. }
  73. if (!val)
  74. return 0;
  75. r->buf_pos = 0;
  76. r->buf_len = 0;
  77. PUT_UTF8(val, tmp, r->buf[r->buf_len++] = tmp;)
  78. return r->buf[r->buf_pos++]; // buf_len is at least 1
  79. }
  80. void ff_text_read(FFTextReader *r, char *buf, size_t size)
  81. {
  82. for ( ; size > 0; size--)
  83. *buf++ = ff_text_r8(r);
  84. }
  85. int ff_text_eof(FFTextReader *r)
  86. {
  87. return r->buf_pos >= r->buf_len && avio_feof(r->pb);
  88. }
  89. int ff_text_peek_r8(FFTextReader *r)
  90. {
  91. int c;
  92. if (r->buf_pos < r->buf_len)
  93. return r->buf[r->buf_pos];
  94. c = ff_text_r8(r);
  95. if (!avio_feof(r->pb)) {
  96. r->buf_pos = 0;
  97. r->buf_len = 1;
  98. r->buf[0] = c;
  99. }
  100. return c;
  101. }
  102. AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q,
  103. const uint8_t *event, int len, int merge)
  104. {
  105. AVPacket *subs, *sub;
  106. if (merge && q->nb_subs > 0) {
  107. /* merge with previous event */
  108. int old_len;
  109. sub = &q->subs[q->nb_subs - 1];
  110. old_len = sub->size;
  111. if (av_grow_packet(sub, len) < 0)
  112. return NULL;
  113. memcpy(sub->data + old_len, event, len);
  114. } else {
  115. /* new event */
  116. if (q->nb_subs >= INT_MAX/sizeof(*q->subs) - 1)
  117. return NULL;
  118. subs = av_fast_realloc(q->subs, &q->allocated_size,
  119. (q->nb_subs + 1) * sizeof(*q->subs));
  120. if (!subs)
  121. return NULL;
  122. q->subs = subs;
  123. sub = &subs[q->nb_subs++];
  124. if (av_new_packet(sub, len) < 0)
  125. return NULL;
  126. sub->flags |= AV_PKT_FLAG_KEY;
  127. sub->pts = sub->dts = 0;
  128. memcpy(sub->data, event, len);
  129. }
  130. return sub;
  131. }
  132. static int cmp_pkt_sub_ts_pos(const void *a, const void *b)
  133. {
  134. const AVPacket *s1 = a;
  135. const AVPacket *s2 = b;
  136. if (s1->pts == s2->pts) {
  137. if (s1->pos == s2->pos)
  138. return 0;
  139. return s1->pos > s2->pos ? 1 : -1;
  140. }
  141. return s1->pts > s2->pts ? 1 : -1;
  142. }
  143. static int cmp_pkt_sub_pos_ts(const void *a, const void *b)
  144. {
  145. const AVPacket *s1 = a;
  146. const AVPacket *s2 = b;
  147. if (s1->pos == s2->pos) {
  148. if (s1->pts == s2->pts)
  149. return 0;
  150. return s1->pts > s2->pts ? 1 : -1;
  151. }
  152. return s1->pos > s2->pos ? 1 : -1;
  153. }
  154. void ff_subtitles_queue_finalize(FFDemuxSubtitlesQueue *q)
  155. {
  156. int i;
  157. qsort(q->subs, q->nb_subs, sizeof(*q->subs),
  158. q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos
  159. : cmp_pkt_sub_pos_ts);
  160. for (i = 0; i < q->nb_subs; i++)
  161. if (q->subs[i].duration == -1 && i < q->nb_subs - 1)
  162. q->subs[i].duration = q->subs[i + 1].pts - q->subs[i].pts;
  163. }
  164. int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt)
  165. {
  166. AVPacket *sub = q->subs + q->current_sub_idx;
  167. if (q->current_sub_idx == q->nb_subs)
  168. return AVERROR_EOF;
  169. if (av_copy_packet(pkt, sub) < 0) {
  170. return AVERROR(ENOMEM);
  171. }
  172. pkt->dts = pkt->pts;
  173. q->current_sub_idx++;
  174. return 0;
  175. }
  176. static int search_sub_ts(const FFDemuxSubtitlesQueue *q, int64_t ts)
  177. {
  178. int s1 = 0, s2 = q->nb_subs - 1;
  179. if (s2 < s1)
  180. return AVERROR(ERANGE);
  181. for (;;) {
  182. int mid;
  183. if (s1 == s2)
  184. return s1;
  185. if (s1 == s2 - 1)
  186. return q->subs[s1].pts <= q->subs[s2].pts ? s1 : s2;
  187. mid = (s1 + s2) / 2;
  188. if (q->subs[mid].pts <= ts)
  189. s1 = mid;
  190. else
  191. s2 = mid;
  192. }
  193. }
  194. int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index,
  195. int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
  196. {
  197. if (flags & AVSEEK_FLAG_BYTE) {
  198. return AVERROR(ENOSYS);
  199. } else if (flags & AVSEEK_FLAG_FRAME) {
  200. if (ts < 0 || ts >= q->nb_subs)
  201. return AVERROR(ERANGE);
  202. q->current_sub_idx = ts;
  203. } else {
  204. int i, idx = search_sub_ts(q, ts);
  205. int64_t ts_selected;
  206. if (idx < 0)
  207. return idx;
  208. for (i = idx; i < q->nb_subs && q->subs[i].pts < min_ts; i++)
  209. if (stream_index == -1 || q->subs[i].stream_index == stream_index)
  210. idx = i;
  211. for (i = idx; i > 0 && q->subs[i].pts > max_ts; i--)
  212. if (stream_index == -1 || q->subs[i].stream_index == stream_index)
  213. idx = i;
  214. ts_selected = q->subs[idx].pts;
  215. if (ts_selected < min_ts || ts_selected > max_ts)
  216. return AVERROR(ERANGE);
  217. /* look back in the latest subtitles for overlapping subtitles */
  218. for (i = idx - 1; i >= 0; i--) {
  219. int64_t pts = q->subs[i].pts;
  220. if (q->subs[i].duration <= 0 ||
  221. (stream_index != -1 && q->subs[i].stream_index != stream_index))
  222. continue;
  223. if (pts >= min_ts && pts > ts_selected - q->subs[i].duration)
  224. idx = i;
  225. else
  226. break;
  227. }
  228. /* If the queue is used to store multiple subtitles streams (like with
  229. * VobSub) and the stream index is not specified, we need to make sure
  230. * to focus on the smallest file position offset for a same timestamp;
  231. * queue is ordered by pts and then filepos, so we can take the first
  232. * entry for a given timestamp. */
  233. if (stream_index == -1)
  234. while (idx > 0 && q->subs[idx - 1].pts == q->subs[idx].pts)
  235. idx--;
  236. q->current_sub_idx = idx;
  237. }
  238. return 0;
  239. }
  240. void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q)
  241. {
  242. int i;
  243. for (i = 0; i < q->nb_subs; i++)
  244. av_free_packet(&q->subs[i]);
  245. av_freep(&q->subs);
  246. q->nb_subs = q->allocated_size = q->current_sub_idx = 0;
  247. }
  248. int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c)
  249. {
  250. int i = 0;
  251. char end_chr;
  252. if (!*c) // cached char?
  253. *c = ff_text_r8(tr);
  254. if (!*c)
  255. return 0;
  256. end_chr = *c == '<' ? '>' : '<';
  257. do {
  258. av_bprint_chars(buf, *c, 1);
  259. *c = ff_text_r8(tr);
  260. i++;
  261. } while (*c != end_chr && *c);
  262. if (end_chr == '>') {
  263. av_bprint_chars(buf, '>', 1);
  264. *c = 0;
  265. }
  266. return i;
  267. }
  268. const char *ff_smil_get_attr_ptr(const char *s, const char *attr)
  269. {
  270. int in_quotes = 0;
  271. const int len = strlen(attr);
  272. while (*s) {
  273. while (*s) {
  274. if (!in_quotes && av_isspace(*s))
  275. break;
  276. in_quotes ^= *s == '"'; // XXX: support escaping?
  277. s++;
  278. }
  279. while (av_isspace(*s))
  280. s++;
  281. if (!av_strncasecmp(s, attr, len) && s[len] == '=')
  282. return s + len + 1 + (s[len + 1] == '"');
  283. }
  284. return NULL;
  285. }
  286. static inline int is_eol(char c)
  287. {
  288. return c == '\r' || c == '\n';
  289. }
  290. void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf)
  291. {
  292. char eol_buf[5], last_was_cr = 0;
  293. int n = 0, i = 0, nb_eol = 0;
  294. av_bprint_clear(buf);
  295. for (;;) {
  296. char c = ff_text_r8(tr);
  297. if (!c)
  298. break;
  299. /* ignore all initial line breaks */
  300. if (n == 0 && is_eol(c))
  301. continue;
  302. /* line break buffering: we don't want to add the trailing \r\n */
  303. if (is_eol(c)) {
  304. nb_eol += c == '\n' || last_was_cr;
  305. if (nb_eol == 2)
  306. break;
  307. eol_buf[i++] = c;
  308. if (i == sizeof(eol_buf) - 1)
  309. break;
  310. last_was_cr = c == '\r';
  311. continue;
  312. }
  313. /* only one line break followed by data: we flush the line breaks
  314. * buffer */
  315. if (i) {
  316. eol_buf[i] = 0;
  317. av_bprintf(buf, "%s", eol_buf);
  318. i = nb_eol = 0;
  319. }
  320. av_bprint_chars(buf, c, 1);
  321. n++;
  322. }
  323. }
  324. void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf)
  325. {
  326. FFTextReader tr;
  327. tr.buf_pos = tr.buf_len = 0;
  328. tr.type = 0;
  329. tr.pb = pb;
  330. ff_subtitles_read_text_chunk(&tr, buf);
  331. }
  332. ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size)
  333. {
  334. size_t cur = 0;
  335. if (!size)
  336. return 0;
  337. while (cur + 1 < size) {
  338. unsigned char c = ff_text_r8(tr);
  339. if (!c)
  340. return ff_text_eof(tr) ? cur : AVERROR_INVALIDDATA;
  341. if (c == '\r' || c == '\n')
  342. break;
  343. buf[cur++] = c;
  344. buf[cur] = '\0';
  345. }
  346. if (ff_text_peek_r8(tr) == '\r')
  347. ff_text_r8(tr);
  348. if (ff_text_peek_r8(tr) == '\n')
  349. ff_text_r8(tr);
  350. return cur;
  351. }