You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

417 lines
15KB

  1. /*
  2. * Audible AA demuxer
  3. * Copyright (c) 2015 Vesselin Bontchev
  4. *
  5. * Header parsing is borrowed from https://github.com/jteeuwen/audible project.
  6. * Copyright (c) 2001-2014, Jim Teeuwen
  7. *
  8. * Redistribution and use in source and binary forms, with or without modification,
  9. * are permitted provided that the following conditions are met:
  10. *
  11. * 1. Redistributions of source code must retain the above copyright notice, this
  12. * list of conditions and the following disclaimer.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  15. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  16. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  17. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
  18. * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  19. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  20. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
  21. * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  23. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24. */
  25. #include "avformat.h"
  26. #include "internal.h"
  27. #include "libavutil/dict.h"
  28. #include "libavutil/intreadwrite.h"
  29. #include "libavutil/tea.h"
  30. #include "libavutil/opt.h"
  31. #define AA_MAGIC 1469084982 /* this identifies an audible .aa file */
  32. #define MAX_CODEC_SECOND_SIZE 3982
  33. #define MAX_TOC_ENTRIES 16
  34. #define MAX_DICTIONARY_ENTRIES 128
  35. #define TEA_BLOCK_SIZE 8
  36. #define CHAPTER_HEADER_SIZE 8
  37. #define TIMEPREC 1000
  38. #define MP3_FRAME_SIZE 104
  39. typedef struct AADemuxContext {
  40. AVClass *class;
  41. uint8_t *aa_fixed_key;
  42. int aa_fixed_key_len;
  43. int codec_second_size;
  44. int current_codec_second_size;
  45. int chapter_idx;
  46. struct AVTEA *tea_ctx;
  47. uint8_t file_key[16];
  48. int64_t current_chapter_size;
  49. int64_t content_start;
  50. int64_t content_end;
  51. int seek_offset;
  52. } AADemuxContext;
  53. static int get_second_size(char *codec_name)
  54. {
  55. int result = -1;
  56. if (!strcmp(codec_name, "mp332")) {
  57. result = 3982;
  58. } else if (!strcmp(codec_name, "acelp16")) {
  59. result = 2000;
  60. } else if (!strcmp(codec_name, "acelp85")) {
  61. result = 1045;
  62. }
  63. return result;
  64. }
  65. static int aa_read_header(AVFormatContext *s)
  66. {
  67. int i, j, idx, largest_idx = -1;
  68. uint32_t nkey, nval, toc_size, npairs, header_seed = 0, start;
  69. char key[128], val[128], codec_name[64] = {0};
  70. uint8_t output[24], dst[8], src[8];
  71. int64_t largest_size = -1, current_size = -1, chapter_pos;
  72. struct toc_entry {
  73. uint32_t offset;
  74. uint32_t size;
  75. } TOC[MAX_TOC_ENTRIES];
  76. uint32_t header_key_part[4];
  77. uint8_t header_key[16] = {0};
  78. AADemuxContext *c = s->priv_data;
  79. AVIOContext *pb = s->pb;
  80. AVStream *st;
  81. int ret;
  82. /* parse .aa header */
  83. avio_skip(pb, 4); // file size
  84. avio_skip(pb, 4); // magic string
  85. toc_size = avio_rb32(pb); // TOC size
  86. avio_skip(pb, 4); // unidentified integer
  87. if (toc_size > MAX_TOC_ENTRIES || toc_size < 2)
  88. return AVERROR_INVALIDDATA;
  89. for (i = 0; i < toc_size; i++) { // read TOC
  90. avio_skip(pb, 4); // TOC entry index
  91. TOC[i].offset = avio_rb32(pb); // block offset
  92. TOC[i].size = avio_rb32(pb); // block size
  93. }
  94. avio_skip(pb, 24); // header termination block (ignored)
  95. npairs = avio_rb32(pb); // read dictionary entries
  96. if (npairs > MAX_DICTIONARY_ENTRIES)
  97. return AVERROR_INVALIDDATA;
  98. for (i = 0; i < npairs; i++) {
  99. memset(val, 0, sizeof(val));
  100. memset(key, 0, sizeof(key));
  101. avio_skip(pb, 1); // unidentified integer
  102. nkey = avio_rb32(pb); // key string length
  103. nval = avio_rb32(pb); // value string length
  104. avio_get_str(pb, nkey, key, sizeof(key));
  105. avio_get_str(pb, nval, val, sizeof(val));
  106. if (!strcmp(key, "codec")) {
  107. av_log(s, AV_LOG_DEBUG, "Codec is <%s>\n", val);
  108. strncpy(codec_name, val, sizeof(codec_name) - 1);
  109. } else if (!strcmp(key, "HeaderSeed")) {
  110. av_log(s, AV_LOG_DEBUG, "HeaderSeed is <%s>\n", val);
  111. header_seed = atoi(val);
  112. } else if (!strcmp(key, "HeaderKey")) { // this looks like "1234567890 1234567890 1234567890 1234567890"
  113. av_log(s, AV_LOG_DEBUG, "HeaderKey is <%s>\n", val);
  114. ret = sscanf(val, "%"SCNu32"%"SCNu32"%"SCNu32"%"SCNu32,
  115. &header_key_part[0], &header_key_part[1], &header_key_part[2], &header_key_part[3]);
  116. if (ret != 4)
  117. return AVERROR_INVALIDDATA;
  118. for (idx = 0; idx < 4; idx++) {
  119. AV_WB32(&header_key[idx * 4], header_key_part[idx]); // convert each part to BE!
  120. }
  121. av_log(s, AV_LOG_DEBUG, "Processed HeaderKey is ");
  122. for (i = 0; i < 16; i++)
  123. av_log(s, AV_LOG_DEBUG, "%02x", header_key[i]);
  124. av_log(s, AV_LOG_DEBUG, "\n");
  125. } else {
  126. av_dict_set(&s->metadata, key, val, 0);
  127. }
  128. }
  129. /* verify fixed key */
  130. if (c->aa_fixed_key_len != 16) {
  131. av_log(s, AV_LOG_ERROR, "aa_fixed_key value needs to be 16 bytes!\n");
  132. return AVERROR(EINVAL);
  133. }
  134. /* verify codec */
  135. if ((c->codec_second_size = get_second_size(codec_name)) == -1) {
  136. av_log(s, AV_LOG_ERROR, "unknown codec <%s>!\n", codec_name);
  137. return AVERROR(EINVAL);
  138. }
  139. /* decryption key derivation */
  140. c->tea_ctx = av_tea_alloc();
  141. if (!c->tea_ctx)
  142. return AVERROR(ENOMEM);
  143. av_tea_init(c->tea_ctx, c->aa_fixed_key, 16);
  144. output[0] = output[1] = 0; // purely for padding purposes
  145. memcpy(output + 2, header_key, 16);
  146. idx = 0;
  147. for (i = 0; i < 3; i++) { // TEA CBC with weird mixed endianness
  148. AV_WB32(src, header_seed);
  149. AV_WB32(src + 4, header_seed + 1);
  150. header_seed += 2;
  151. av_tea_crypt(c->tea_ctx, dst, src, 1, NULL, 0); // TEA ECB encrypt
  152. for (j = 0; j < TEA_BLOCK_SIZE && idx < 18; j+=1, idx+=1) {
  153. output[idx] = output[idx] ^ dst[j];
  154. }
  155. }
  156. memcpy(c->file_key, output + 2, 16); // skip first 2 bytes of output
  157. av_log(s, AV_LOG_DEBUG, "File key is ");
  158. for (i = 0; i < 16; i++)
  159. av_log(s, AV_LOG_DEBUG, "%02x", c->file_key[i]);
  160. av_log(s, AV_LOG_DEBUG, "\n");
  161. /* decoder setup */
  162. st = avformat_new_stream(s, NULL);
  163. if (!st) {
  164. av_freep(&c->tea_ctx);
  165. return AVERROR(ENOMEM);
  166. }
  167. st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
  168. if (!strcmp(codec_name, "mp332")) {
  169. st->codecpar->codec_id = AV_CODEC_ID_MP3;
  170. st->codecpar->sample_rate = 22050;
  171. st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
  172. avpriv_set_pts_info(st, 64, 8, 32000 * TIMEPREC);
  173. // encoded audio frame is MP3_FRAME_SIZE bytes (+1 with padding, unlikely)
  174. } else if (!strcmp(codec_name, "acelp85")) {
  175. st->codecpar->codec_id = AV_CODEC_ID_SIPR;
  176. st->codecpar->block_align = 19;
  177. st->codecpar->channels = 1;
  178. st->codecpar->sample_rate = 8500;
  179. st->codecpar->bit_rate = 8500;
  180. st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
  181. avpriv_set_pts_info(st, 64, 8, 8500 * TIMEPREC);
  182. } else if (!strcmp(codec_name, "acelp16")) {
  183. st->codecpar->codec_id = AV_CODEC_ID_SIPR;
  184. st->codecpar->block_align = 20;
  185. st->codecpar->channels = 1;
  186. st->codecpar->sample_rate = 16000;
  187. st->codecpar->bit_rate = 16000;
  188. st->need_parsing = AVSTREAM_PARSE_FULL_RAW;
  189. avpriv_set_pts_info(st, 64, 8, 16000 * TIMEPREC);
  190. }
  191. /* determine, and jump to audio start offset */
  192. for (i = 1; i < toc_size; i++) { // skip the first entry!
  193. current_size = TOC[i].size;
  194. if (current_size > largest_size) {
  195. largest_idx = i;
  196. largest_size = current_size;
  197. }
  198. }
  199. start = TOC[largest_idx].offset;
  200. avio_seek(pb, start, SEEK_SET);
  201. // extract chapter positions. since all formats have constant bit rate, use it
  202. // as time base in bytes/s, for easy stream position <-> timestamp conversion
  203. st->start_time = 0;
  204. c->content_start = start;
  205. c->content_end = start + largest_size;
  206. while ((chapter_pos = avio_tell(pb)) >= 0 && chapter_pos < c->content_end) {
  207. int chapter_idx = s->nb_chapters;
  208. uint32_t chapter_size = avio_rb32(pb);
  209. if (chapter_size == 0) break;
  210. chapter_pos -= start + CHAPTER_HEADER_SIZE * chapter_idx;
  211. avio_skip(pb, 4 + chapter_size);
  212. if (!avpriv_new_chapter(s, chapter_idx, st->time_base,
  213. chapter_pos * TIMEPREC, (chapter_pos + chapter_size) * TIMEPREC, NULL))
  214. return AVERROR(ENOMEM);
  215. }
  216. st->duration = (largest_size - CHAPTER_HEADER_SIZE * s->nb_chapters) * TIMEPREC;
  217. ff_update_cur_dts(s, st, 0);
  218. avio_seek(pb, start, SEEK_SET);
  219. c->current_chapter_size = 0;
  220. c->seek_offset = 0;
  221. return 0;
  222. }
  223. static int aa_read_packet(AVFormatContext *s, AVPacket *pkt)
  224. {
  225. uint8_t dst[TEA_BLOCK_SIZE];
  226. uint8_t src[TEA_BLOCK_SIZE];
  227. int i;
  228. int trailing_bytes;
  229. int blocks;
  230. uint8_t buf[MAX_CODEC_SECOND_SIZE * 2];
  231. int written = 0;
  232. int ret;
  233. AADemuxContext *c = s->priv_data;
  234. uint64_t pos = avio_tell(s->pb);
  235. // are we at the end of the audio content?
  236. if (pos >= c->content_end) {
  237. return AVERROR_EOF;
  238. }
  239. // are we at the start of a chapter?
  240. if (c->current_chapter_size == 0) {
  241. c->current_chapter_size = avio_rb32(s->pb);
  242. if (c->current_chapter_size == 0) {
  243. return AVERROR_EOF;
  244. }
  245. av_log(s, AV_LOG_DEBUG, "Chapter %d (%" PRId64 " bytes)\n", c->chapter_idx, c->current_chapter_size);
  246. c->chapter_idx = c->chapter_idx + 1;
  247. avio_skip(s->pb, 4); // data start offset
  248. pos += 8;
  249. c->current_codec_second_size = c->codec_second_size;
  250. }
  251. // is this the last block in this chapter?
  252. if (c->current_chapter_size / c->current_codec_second_size == 0) {
  253. c->current_codec_second_size = c->current_chapter_size % c->current_codec_second_size;
  254. }
  255. // decrypt c->current_codec_second_size bytes
  256. blocks = c->current_codec_second_size / TEA_BLOCK_SIZE;
  257. for (i = 0; i < blocks; i++) {
  258. ret = avio_read(s->pb, src, TEA_BLOCK_SIZE);
  259. if (ret != TEA_BLOCK_SIZE)
  260. return (ret < 0) ? ret : AVERROR_EOF;
  261. av_tea_init(c->tea_ctx, c->file_key, 16);
  262. av_tea_crypt(c->tea_ctx, dst, src, 1, NULL, 1);
  263. memcpy(buf + written, dst, TEA_BLOCK_SIZE);
  264. written = written + TEA_BLOCK_SIZE;
  265. }
  266. trailing_bytes = c->current_codec_second_size % TEA_BLOCK_SIZE;
  267. if (trailing_bytes != 0) { // trailing bytes are left unencrypted!
  268. ret = avio_read(s->pb, src, trailing_bytes);
  269. if (ret != trailing_bytes)
  270. return (ret < 0) ? ret : AVERROR_EOF;
  271. memcpy(buf + written, src, trailing_bytes);
  272. written = written + trailing_bytes;
  273. }
  274. // update state
  275. c->current_chapter_size = c->current_chapter_size - c->current_codec_second_size;
  276. if (c->current_chapter_size <= 0)
  277. c->current_chapter_size = 0;
  278. if (c->seek_offset > written)
  279. c->seek_offset = 0; // ignore wrong estimate
  280. ret = av_new_packet(pkt, written - c->seek_offset);
  281. if (ret < 0)
  282. return ret;
  283. memcpy(pkt->data, buf + c->seek_offset, written - c->seek_offset);
  284. pkt->pos = pos;
  285. c->seek_offset = 0;
  286. return 0;
  287. }
  288. static int aa_read_seek(AVFormatContext *s,
  289. int stream_index, int64_t timestamp, int flags)
  290. {
  291. AADemuxContext *c = s->priv_data;
  292. AVChapter *ch;
  293. int64_t chapter_pos, chapter_start, chapter_size;
  294. int chapter_idx = 0;
  295. // find chapter containing seek timestamp
  296. if (timestamp < 0)
  297. timestamp = 0;
  298. while (chapter_idx < s->nb_chapters && timestamp >= s->chapters[chapter_idx]->end) {
  299. ++chapter_idx;
  300. }
  301. if (chapter_idx >= s->nb_chapters) {
  302. chapter_idx = s->nb_chapters - 1;
  303. if (chapter_idx < 0) return -1; // there is no chapter.
  304. timestamp = s->chapters[chapter_idx]->end;
  305. }
  306. ch = s->chapters[chapter_idx];
  307. // sync by clamping timestamp to nearest valid block position in its chapter
  308. chapter_size = ch->end / TIMEPREC - ch->start / TIMEPREC;
  309. chapter_pos = av_rescale_rnd((timestamp - ch->start) / TIMEPREC,
  310. 1, c->codec_second_size,
  311. (flags & AVSEEK_FLAG_BACKWARD) ? AV_ROUND_DOWN : AV_ROUND_UP)
  312. * c->codec_second_size;
  313. if (chapter_pos >= chapter_size)
  314. chapter_pos = chapter_size;
  315. chapter_start = c->content_start + (ch->start / TIMEPREC) + CHAPTER_HEADER_SIZE * (1 + chapter_idx);
  316. // reinit read state
  317. avio_seek(s->pb, chapter_start + chapter_pos, SEEK_SET);
  318. c->current_codec_second_size = c->codec_second_size;
  319. c->current_chapter_size = chapter_size - chapter_pos;
  320. c->chapter_idx = 1 + chapter_idx;
  321. // for unaligned frames, estimate offset of first frame in block (assume no padding)
  322. if (s->streams[0]->codecpar->codec_id == AV_CODEC_ID_MP3) {
  323. c->seek_offset = (MP3_FRAME_SIZE - chapter_pos % MP3_FRAME_SIZE) % MP3_FRAME_SIZE;
  324. }
  325. ff_update_cur_dts(s, s->streams[0], ch->start + (chapter_pos + c->seek_offset) * TIMEPREC);
  326. return 1;
  327. }
  328. static int aa_probe(const AVProbeData *p)
  329. {
  330. uint8_t *buf = p->buf;
  331. // first 4 bytes are file size, next 4 bytes are the magic
  332. if (AV_RB32(buf+4) != AA_MAGIC)
  333. return 0;
  334. return AVPROBE_SCORE_MAX / 2;
  335. }
  336. static int aa_read_close(AVFormatContext *s)
  337. {
  338. AADemuxContext *c = s->priv_data;
  339. av_freep(&c->tea_ctx);
  340. return 0;
  341. }
  342. #define OFFSET(x) offsetof(AADemuxContext, x)
  343. static const AVOption aa_options[] = {
  344. { "aa_fixed_key", // extracted from libAAX_SDK.so and AAXSDKWin.dll files!
  345. "Fixed key used for handling Audible AA files", OFFSET(aa_fixed_key),
  346. AV_OPT_TYPE_BINARY, {.str="77214d4b196a87cd520045fd2a51d673"},
  347. .flags = AV_OPT_FLAG_DECODING_PARAM },
  348. { NULL },
  349. };
  350. static const AVClass aa_class = {
  351. .class_name = "aa",
  352. .item_name = av_default_item_name,
  353. .option = aa_options,
  354. .version = LIBAVUTIL_VERSION_INT,
  355. };
  356. AVInputFormat ff_aa_demuxer = {
  357. .name = "aa",
  358. .long_name = NULL_IF_CONFIG_SMALL("Audible AA format files"),
  359. .priv_class = &aa_class,
  360. .priv_data_size = sizeof(AADemuxContext),
  361. .extensions = "aa",
  362. .read_probe = aa_probe,
  363. .read_header = aa_read_header,
  364. .read_packet = aa_read_packet,
  365. .read_seek = aa_read_seek,
  366. .read_close = aa_read_close,
  367. .flags = AVFMT_NO_BYTE_SEEK | AVFMT_NOGENSEARCH,
  368. };