You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1360 lines
41KB

  1. /*
  2. * copyright (c) 2015 Rick Kern <kernrj@gmail.com>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include <VideoToolbox/VideoToolbox.h>
  21. #include <CoreVideo/CoreVideo.h>
  22. #include <CoreMedia/CoreMedia.h>
  23. #include <TargetConditionals.h>
  24. #include <Availability.h>
  25. #include "avcodec.h"
  26. #include "libavutil/opt.h"
  27. #include "libavutil/avassert.h"
  28. #include "libavutil/atomic.h"
  29. #include "libavutil/avstring.h"
  30. #include "libavcodec/avcodec.h"
  31. #include "internal.h"
  32. #include <pthread.h>
  33. typedef enum VT_H264Profile {
  34. H264_PROF_AUTO,
  35. H264_PROF_BASELINE,
  36. H264_PROF_MAIN,
  37. H264_PROF_HIGH,
  38. H264_PROF_COUNT
  39. } VT_H264Profile;
  40. static const uint8_t start_code[] = { 0, 0, 0, 1 };
  41. typedef struct BufNode {
  42. CMSampleBufferRef cm_buffer;
  43. struct BufNode* next;
  44. int error;
  45. } BufNode;
  46. typedef struct VTEncContext {
  47. AVClass *class;
  48. VTCompressionSessionRef session;
  49. pthread_mutex_t lock;
  50. pthread_cond_t cv_sample_sent;
  51. int async_error;
  52. BufNode *q_head;
  53. BufNode *q_tail;
  54. int64_t frame_ct_out;
  55. int64_t frame_ct_in;
  56. int64_t first_pts;
  57. int64_t dts_delta;
  58. int64_t profile;
  59. int64_t level;
  60. bool flushing;
  61. bool has_b_frames;
  62. bool warned_color_range;
  63. } VTEncContext;
  64. static void set_async_error(VTEncContext *vtctx, int err)
  65. {
  66. BufNode *info;
  67. pthread_mutex_lock(&vtctx->lock);
  68. vtctx->async_error = err;
  69. info = vtctx->q_head;
  70. vtctx->q_head = vtctx->q_tail = NULL;
  71. while (info) {
  72. BufNode *next = info->next;
  73. CFRelease(info->cm_buffer);
  74. av_free(info);
  75. info = next;
  76. }
  77. pthread_mutex_unlock(&vtctx->lock);
  78. }
  79. static int vtenc_q_pop(VTEncContext *vtctx, bool wait, CMSampleBufferRef *buf)
  80. {
  81. BufNode *info;
  82. pthread_mutex_lock(&vtctx->lock);
  83. if (vtctx->async_error) {
  84. pthread_mutex_unlock(&vtctx->lock);
  85. return vtctx->async_error;
  86. }
  87. if (vtctx->flushing && vtctx->frame_ct_in == vtctx->frame_ct_out) {
  88. *buf = NULL;
  89. pthread_mutex_unlock(&vtctx->lock);
  90. return 0;
  91. }
  92. while (!vtctx->q_head && !vtctx->async_error && wait) {
  93. pthread_cond_wait(&vtctx->cv_sample_sent, &vtctx->lock);
  94. }
  95. if (!vtctx->q_head) {
  96. pthread_mutex_unlock(&vtctx->lock);
  97. *buf = NULL;
  98. return 0;
  99. }
  100. info = vtctx->q_head;
  101. vtctx->q_head = vtctx->q_head->next;
  102. if (!vtctx->q_head) {
  103. vtctx->q_tail = NULL;
  104. }
  105. pthread_mutex_unlock(&vtctx->lock);
  106. *buf = info->cm_buffer;
  107. av_free(info);
  108. vtctx->frame_ct_out++;
  109. return 0;
  110. }
  111. static void vtenc_q_push(VTEncContext *vtctx, CMSampleBufferRef buffer)
  112. {
  113. BufNode *info = av_malloc(sizeof(BufNode));
  114. if (!info) {
  115. set_async_error(vtctx, AVERROR(ENOMEM));
  116. return;
  117. }
  118. CFRetain(buffer);
  119. info->cm_buffer = buffer;
  120. info->next = NULL;
  121. pthread_mutex_lock(&vtctx->lock);
  122. pthread_cond_signal(&vtctx->cv_sample_sent);
  123. if (!vtctx->q_head) {
  124. vtctx->q_head = info;
  125. } else {
  126. vtctx->q_tail->next = info;
  127. }
  128. vtctx->q_tail = info;
  129. pthread_mutex_unlock(&vtctx->lock);
  130. }
  131. static CMVideoCodecType get_cm_codec_type(enum AVCodecID id)
  132. {
  133. switch (id) {
  134. case AV_CODEC_ID_H264: return kCMVideoCodecType_H264;
  135. default: return 0;
  136. }
  137. }
  138. static void vtenc_free_block(void *opaque, uint8_t *data)
  139. {
  140. CMBlockBufferRef block = opaque;
  141. CFRelease(block);
  142. }
  143. /**
  144. * Get the parameter sets from a CMSampleBufferRef.
  145. * @param dst If *dst isn't NULL, the parameters are copied into existing
  146. * memory. *dst_size must be set accordingly when *dst != NULL.
  147. * If *dst is NULL, it will be allocated.
  148. * In all cases, *dst_size is set to the number of bytes used starting
  149. * at *dst.
  150. */
  151. static int get_params_size(
  152. AVCodecContext *avctx,
  153. CMVideoFormatDescriptionRef vid_fmt,
  154. size_t *size)
  155. {
  156. size_t total_size = 0;
  157. size_t ps_count;
  158. int is_count_bad = 0;
  159. size_t i;
  160. int status;
  161. status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
  162. 0,
  163. NULL,
  164. NULL,
  165. &ps_count,
  166. NULL);
  167. if (status) {
  168. is_count_bad = 1;
  169. ps_count = 0;
  170. status = 0;
  171. }
  172. for (i = 0; i < ps_count || is_count_bad; i++) {
  173. const uint8_t *ps;
  174. size_t ps_size;
  175. status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
  176. i,
  177. &ps,
  178. &ps_size,
  179. NULL,
  180. NULL);
  181. if (status) {
  182. /*
  183. * When ps_count is invalid, status != 0 ends the loop normally
  184. * unless we didn't get any parameter sets.
  185. */
  186. if (i > 0 && is_count_bad) status = 0;
  187. break;
  188. }
  189. total_size += ps_size + sizeof(start_code);
  190. }
  191. if (status) {
  192. av_log(avctx, AV_LOG_ERROR, "Error getting parameter set sizes: %d\n", status);
  193. return AVERROR_EXTERNAL;
  194. }
  195. *size = total_size;
  196. return 0;
  197. }
  198. static int copy_param_sets(
  199. AVCodecContext *avctx,
  200. CMVideoFormatDescriptionRef vid_fmt,
  201. uint8_t *dst,
  202. size_t dst_size)
  203. {
  204. size_t ps_count;
  205. int is_count_bad = 0;
  206. int status;
  207. size_t offset = 0;
  208. size_t i;
  209. status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
  210. 0,
  211. NULL,
  212. NULL,
  213. &ps_count,
  214. NULL);
  215. if (status) {
  216. is_count_bad = 1;
  217. ps_count = 0;
  218. status = 0;
  219. }
  220. for (i = 0; i < ps_count || is_count_bad; i++) {
  221. const uint8_t *ps;
  222. size_t ps_size;
  223. size_t next_offset;
  224. status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
  225. i,
  226. &ps,
  227. &ps_size,
  228. NULL,
  229. NULL);
  230. if (status) {
  231. if (i > 0 && is_count_bad) status = 0;
  232. break;
  233. }
  234. next_offset = offset + sizeof(start_code) + ps_size;
  235. if (dst_size < next_offset) {
  236. av_log(avctx, AV_LOG_ERROR, "Error: buffer too small for parameter sets.\n");
  237. return AVERROR_BUFFER_TOO_SMALL;
  238. }
  239. memcpy(dst + offset, start_code, sizeof(start_code));
  240. offset += sizeof(start_code);
  241. memcpy(dst + offset, ps, ps_size);
  242. offset = next_offset;
  243. }
  244. if (status) {
  245. av_log(avctx, AV_LOG_ERROR, "Error getting parameter set data: %d\n", status);
  246. return AVERROR_EXTERNAL;
  247. }
  248. return 0;
  249. }
  250. static int set_extradata(AVCodecContext *avctx, CMSampleBufferRef sample_buffer)
  251. {
  252. CMVideoFormatDescriptionRef vid_fmt;
  253. size_t total_size;
  254. int status;
  255. vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
  256. if (!vid_fmt) {
  257. av_log(avctx, AV_LOG_ERROR, "No video format.\n");
  258. return AVERROR_EXTERNAL;
  259. }
  260. status = get_params_size(avctx, vid_fmt, &total_size);
  261. if (status) {
  262. av_log(avctx, AV_LOG_ERROR, "Could not get parameter sets.\n");
  263. return status;
  264. }
  265. avctx->extradata = av_malloc(total_size);
  266. if (!avctx->extradata) {
  267. return AVERROR(ENOMEM);
  268. }
  269. avctx->extradata_size = total_size;
  270. status = copy_param_sets(avctx, vid_fmt, avctx->extradata, total_size);
  271. if (status) {
  272. av_log(avctx, AV_LOG_ERROR, "Could not copy param sets.\n");
  273. return status;
  274. }
  275. return 0;
  276. }
  277. static void vtenc_output_callback(
  278. void *ctx,
  279. void *sourceFrameCtx,
  280. OSStatus status,
  281. VTEncodeInfoFlags flags,
  282. CMSampleBufferRef sample_buffer)
  283. {
  284. AVCodecContext *avctx = ctx;
  285. VTEncContext *vtctx = avctx->priv_data;
  286. if (vtctx->async_error) {
  287. if(sample_buffer) CFRelease(sample_buffer);
  288. return;
  289. }
  290. if (status || !sample_buffer) {
  291. av_log(avctx, AV_LOG_ERROR, "Error encoding frame: %d\n", (int)status);
  292. set_async_error(vtctx, AVERROR_EXTERNAL);
  293. return;
  294. }
  295. if (!avctx->extradata && (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
  296. int set_status = set_extradata(avctx, sample_buffer);
  297. if (set_status) {
  298. set_async_error(vtctx, set_status);
  299. return;
  300. }
  301. }
  302. vtenc_q_push(vtctx, sample_buffer);
  303. }
  304. static int get_length_code_size(
  305. AVCodecContext *avctx,
  306. CMSampleBufferRef sample_buffer,
  307. size_t *size)
  308. {
  309. CMVideoFormatDescriptionRef vid_fmt;
  310. int isize;
  311. int status;
  312. vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
  313. if (!vid_fmt) {
  314. av_log(avctx, AV_LOG_ERROR, "Error getting buffer format description.\n");
  315. return AVERROR_EXTERNAL;
  316. }
  317. status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(vid_fmt,
  318. 0,
  319. NULL,
  320. NULL,
  321. NULL,
  322. &isize);
  323. if (status) {
  324. av_log(avctx, AV_LOG_ERROR, "Error getting length code size: %d\n", status);
  325. return AVERROR_EXTERNAL;
  326. }
  327. *size = isize;
  328. return 0;
  329. }
  330. /*
  331. * Returns true on success.
  332. *
  333. * If profile_level_val is NULL and this method returns true, don't specify the
  334. * profile/level to the encoder.
  335. */
  336. static bool get_vt_profile_level(AVCodecContext *avctx,
  337. CFStringRef *profile_level_val)
  338. {
  339. VTEncContext *vtctx = avctx->priv_data;
  340. int64_t profile = vtctx->profile;
  341. if (profile == H264_PROF_AUTO && vtctx->level) {
  342. //Need to pick a profile if level is not auto-selected.
  343. profile = vtctx->has_b_frames ? H264_PROF_MAIN : H264_PROF_BASELINE;
  344. }
  345. *profile_level_val = NULL;
  346. switch (profile) {
  347. case H264_PROF_AUTO:
  348. return true;
  349. case H264_PROF_BASELINE:
  350. switch (vtctx->level) {
  351. case 0: *profile_level_val = kVTProfileLevel_H264_Baseline_AutoLevel; break;
  352. case 13: *profile_level_val = kVTProfileLevel_H264_Baseline_1_3; break;
  353. case 30: *profile_level_val = kVTProfileLevel_H264_Baseline_3_0; break;
  354. case 31: *profile_level_val = kVTProfileLevel_H264_Baseline_3_1; break;
  355. case 32: *profile_level_val = kVTProfileLevel_H264_Baseline_3_2; break;
  356. case 40: *profile_level_val = kVTProfileLevel_H264_Baseline_4_0; break;
  357. case 41: *profile_level_val = kVTProfileLevel_H264_Baseline_4_1; break;
  358. case 42: *profile_level_val = kVTProfileLevel_H264_Baseline_4_2; break;
  359. case 50: *profile_level_val = kVTProfileLevel_H264_Baseline_5_0; break;
  360. case 51: *profile_level_val = kVTProfileLevel_H264_Baseline_5_1; break;
  361. case 52: *profile_level_val = kVTProfileLevel_H264_Baseline_5_2; break;
  362. }
  363. break;
  364. case H264_PROF_MAIN:
  365. switch (vtctx->level) {
  366. case 0: *profile_level_val = kVTProfileLevel_H264_Main_AutoLevel; break;
  367. case 30: *profile_level_val = kVTProfileLevel_H264_Main_3_0; break;
  368. case 31: *profile_level_val = kVTProfileLevel_H264_Main_3_1; break;
  369. case 32: *profile_level_val = kVTProfileLevel_H264_Main_3_2; break;
  370. case 40: *profile_level_val = kVTProfileLevel_H264_Main_4_0; break;
  371. case 41: *profile_level_val = kVTProfileLevel_H264_Main_4_1; break;
  372. case 42: *profile_level_val = kVTProfileLevel_H264_Main_4_2; break;
  373. case 50: *profile_level_val = kVTProfileLevel_H264_Main_5_0; break;
  374. case 51: *profile_level_val = kVTProfileLevel_H264_Main_5_1; break;
  375. case 52: *profile_level_val = kVTProfileLevel_H264_Main_5_2; break;
  376. }
  377. break;
  378. case H264_PROF_HIGH:
  379. switch (vtctx->level) {
  380. case 0: *profile_level_val = kVTProfileLevel_H264_High_AutoLevel; break;
  381. case 30: *profile_level_val = kVTProfileLevel_H264_High_3_0; break;
  382. case 31: *profile_level_val = kVTProfileLevel_H264_High_3_1; break;
  383. case 32: *profile_level_val = kVTProfileLevel_H264_High_3_2; break;
  384. case 40: *profile_level_val = kVTProfileLevel_H264_High_4_0; break;
  385. case 41: *profile_level_val = kVTProfileLevel_H264_High_4_1; break;
  386. case 42: *profile_level_val = kVTProfileLevel_H264_High_4_2; break;
  387. case 50: *profile_level_val = kVTProfileLevel_H264_High_5_0; break;
  388. case 51: *profile_level_val = kVTProfileLevel_H264_High_5_1; break;
  389. case 52: *profile_level_val = kVTProfileLevel_H264_High_5_2; break;
  390. }
  391. break;
  392. }
  393. if (!*profile_level_val) {
  394. av_log(avctx, AV_LOG_ERROR, "Invalid Profile/Level.\n");
  395. return false;
  396. }
  397. return true;
  398. }
  399. static av_cold int vtenc_init(AVCodecContext *avctx)
  400. {
  401. CFMutableDictionaryRef enc_info;
  402. CMVideoCodecType codec_type;
  403. VTEncContext *vtctx = avctx->priv_data;
  404. CFStringRef profile_level;
  405. SInt32 bit_rate = avctx->bit_rate;
  406. CFNumberRef bit_rate_num;
  407. int status;
  408. codec_type = get_cm_codec_type(avctx->codec_id);
  409. if (!codec_type) {
  410. av_log(avctx, AV_LOG_ERROR, "Error: no mapping for AVCodecID %d\n", avctx->codec_id);
  411. return AVERROR(EINVAL);
  412. }
  413. vtctx->has_b_frames = avctx->has_b_frames || avctx->max_b_frames > 0;
  414. if(vtctx->has_b_frames && vtctx->profile == H264_PROF_BASELINE){
  415. av_log(avctx, AV_LOG_WARNING, "Cannot use B-frames with baseline profile. Output will not contain B-frames.\n");
  416. vtctx->has_b_frames = false;
  417. }
  418. if (!get_vt_profile_level(avctx, &profile_level)) return AVERROR(EINVAL);
  419. vtctx->session = NULL;
  420. enc_info = CFDictionaryCreateMutable(
  421. kCFAllocatorDefault,
  422. 20,
  423. &kCFCopyStringDictionaryKeyCallBacks,
  424. &kCFTypeDictionaryValueCallBacks
  425. );
  426. if (!enc_info) return AVERROR(ENOMEM);
  427. #if !TARGET_OS_IPHONE
  428. CFDictionarySetValue(enc_info, kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder, kCFBooleanTrue);
  429. CFDictionarySetValue(enc_info, kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder, kCFBooleanTrue);
  430. #endif
  431. status = VTCompressionSessionCreate(
  432. kCFAllocatorDefault,
  433. avctx->width,
  434. avctx->height,
  435. codec_type,
  436. enc_info,
  437. NULL,
  438. kCFAllocatorDefault,
  439. vtenc_output_callback,
  440. avctx,
  441. &vtctx->session
  442. );
  443. #if !TARGET_OS_IPHONE
  444. if (status != 0 || !vtctx->session) {
  445. CFDictionaryRemoveValue(enc_info, kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder);
  446. status = VTCompressionSessionCreate(
  447. kCFAllocatorDefault,
  448. avctx->width,
  449. avctx->height,
  450. codec_type,
  451. enc_info,
  452. NULL,
  453. kCFAllocatorDefault,
  454. vtenc_output_callback,
  455. avctx,
  456. &vtctx->session
  457. );
  458. }
  459. #endif
  460. CFRelease(enc_info);
  461. if (status || !vtctx->session) {
  462. av_log(avctx, AV_LOG_ERROR, "Error: cannot create compression session: %d\n", status);
  463. return AVERROR_EXTERNAL;
  464. }
  465. bit_rate_num = CFNumberCreate(kCFAllocatorDefault,
  466. kCFNumberSInt32Type,
  467. &bit_rate);
  468. if (!bit_rate_num) return AVERROR(ENOMEM);
  469. status = VTSessionSetProperty(vtctx->session,
  470. kVTCompressionPropertyKey_AverageBitRate,
  471. bit_rate_num);
  472. CFRelease(bit_rate_num);
  473. if (status) {
  474. av_log(avctx, AV_LOG_ERROR, "Error setting bitrate property: %d\n", status);
  475. return AVERROR_EXTERNAL;
  476. }
  477. if (profile_level) {
  478. status = VTSessionSetProperty(vtctx->session,
  479. kVTCompressionPropertyKey_ProfileLevel,
  480. profile_level);
  481. if (status) {
  482. av_log(avctx, AV_LOG_ERROR, "Error setting profile/level property: %d\n", status);
  483. return AVERROR_EXTERNAL;
  484. }
  485. }
  486. if (avctx->gop_size > 0) {
  487. CFNumberRef interval = CFNumberCreate(kCFAllocatorDefault,
  488. kCFNumberIntType,
  489. &avctx->gop_size);
  490. status = VTSessionSetProperty(vtctx->session,
  491. kVTCompressionPropertyKey_MaxKeyFrameInterval,
  492. interval);
  493. if (status) {
  494. av_log(avctx, AV_LOG_ERROR, "Error setting 'max key-frame interval' property: %d\n", status);
  495. return AVERROR_EXTERNAL;
  496. }
  497. }
  498. if (!vtctx->has_b_frames) {
  499. status = VTSessionSetProperty(vtctx->session,
  500. kVTCompressionPropertyKey_AllowFrameReordering,
  501. kCFBooleanFalse);
  502. if (status) {
  503. av_log(avctx, AV_LOG_ERROR, "Error setting 'allow frame reordering' property: %d\n", status);
  504. return AVERROR_EXTERNAL;
  505. }
  506. }
  507. status = VTCompressionSessionPrepareToEncodeFrames(vtctx->session);
  508. if (status) {
  509. av_log(avctx, AV_LOG_ERROR, "Error: cannot prepare encoder: %d\n", status);
  510. return AVERROR_EXTERNAL;
  511. }
  512. pthread_mutex_init(&vtctx->lock, NULL);
  513. pthread_cond_init(&vtctx->cv_sample_sent, NULL);
  514. vtctx->dts_delta = vtctx->has_b_frames ? -1 : 0;
  515. return 0;
  516. }
  517. static void vtenc_get_frame_info(CMSampleBufferRef buffer, bool *is_key_frame)
  518. {
  519. CFArrayRef attachments;
  520. CFDictionaryRef attachment;
  521. CFBooleanRef not_sync;
  522. CFIndex len;
  523. attachments = CMSampleBufferGetSampleAttachmentsArray(buffer, false);
  524. len = !attachments ? 0 : CFArrayGetCount(attachments);
  525. if (!len) {
  526. *is_key_frame = true;
  527. return;
  528. }
  529. attachment = CFArrayGetValueAtIndex(attachments, 0);
  530. if (CFDictionaryGetValueIfPresent(attachment,
  531. kCMSampleAttachmentKey_NotSync,
  532. (const void **)&not_sync))
  533. {
  534. *is_key_frame = !CFBooleanGetValue(not_sync);
  535. } else {
  536. *is_key_frame = true;
  537. }
  538. }
  539. /**
  540. * Replaces length codes with H.264 Annex B start codes.
  541. * length_code_size must equal sizeof(start_code).
  542. * On failure, the contents of data may have been modified.
  543. *
  544. * @param length_code_size Byte length of each length code
  545. * @param data Call with NAL units prefixed with length codes.
  546. * On success, the length codes are replace with
  547. * start codes.
  548. * @param size Length of data, excluding any padding.
  549. * @return 0 on success
  550. * AVERROR_BUFFER_TOO_SMALL if length code size is smaller
  551. * than a start code or if a length_code in data specifies
  552. * data beyond the end of its buffer.
  553. */
  554. static int replace_length_codes(size_t length_code_size,
  555. uint8_t *data,
  556. size_t size)
  557. {
  558. size_t remaining_size = size;
  559. if (length_code_size != sizeof(start_code)) {
  560. av_log(NULL, AV_LOG_ERROR, "Start code size and length code size not equal.\n");
  561. return AVERROR_BUFFER_TOO_SMALL;
  562. }
  563. while (remaining_size > 0) {
  564. size_t box_len = 0;
  565. size_t i;
  566. for (i = 0; i < length_code_size; i++) {
  567. box_len <<= 8;
  568. box_len |= data[i];
  569. }
  570. if (remaining_size < box_len + sizeof(start_code)) {
  571. av_log(NULL, AV_LOG_ERROR, "Length is out of range.\n");
  572. AVERROR_BUFFER_TOO_SMALL;
  573. }
  574. memcpy(data, start_code, sizeof(start_code));
  575. data += box_len + sizeof(start_code);
  576. remaining_size -= box_len + sizeof(start_code);
  577. }
  578. return 0;
  579. }
  580. /**
  581. * Copies NAL units and replaces length codes with
  582. * H.264 Annex B start codes. On failure, the contents of
  583. * dst_data may have been modified.
  584. *
  585. * @param length_code_size Byte length of each length code
  586. * @param src_data NAL units prefixed with length codes.
  587. * @param src_size Length of buffer, excluding any padding.
  588. * @param dst_data Must be zeroed before calling this function.
  589. * Contains the copied NAL units prefixed with
  590. * start codes when the function returns
  591. * successfully.
  592. * @param dst_size Length of dst_data
  593. * @return 0 on success
  594. * AVERROR_INVALIDDATA if length_code_size is invalid
  595. * AVERROR_BUFFER_TOO_SMALL if dst_data is too small
  596. * or if a length_code in src_data specifies data beyond
  597. * the end of its buffer.
  598. */
  599. static int copy_replace_length_codes(
  600. size_t length_code_size,
  601. const uint8_t *src_data,
  602. size_t src_size,
  603. uint8_t *dst_data,
  604. size_t dst_size)
  605. {
  606. size_t remaining_src_size = src_size;
  607. size_t remaining_dst_size = dst_size;
  608. if (length_code_size > 4) {
  609. return AVERROR_INVALIDDATA;
  610. }
  611. while (remaining_src_size > 0) {
  612. size_t curr_src_len;
  613. size_t curr_dst_len;
  614. size_t box_len = 0;
  615. size_t i;
  616. uint8_t *dst_box;
  617. const uint8_t *src_box;
  618. for (i = 0; i < length_code_size; i++) {
  619. box_len <<= 8;
  620. box_len |= src_data[i];
  621. }
  622. curr_src_len = box_len + length_code_size;
  623. curr_dst_len = box_len + sizeof(start_code);
  624. if (remaining_src_size < curr_src_len) {
  625. return AVERROR_BUFFER_TOO_SMALL;
  626. }
  627. if (remaining_dst_size < curr_dst_len) {
  628. return AVERROR_BUFFER_TOO_SMALL;
  629. }
  630. dst_box = dst_data + sizeof(start_code);
  631. src_box = src_data + length_code_size;
  632. memcpy(dst_data, start_code, sizeof(start_code));
  633. memcpy(dst_box, src_box, box_len);
  634. src_data += curr_src_len;
  635. dst_data += curr_dst_len;
  636. remaining_src_size -= curr_src_len;
  637. remaining_dst_size -= curr_dst_len;
  638. }
  639. return 0;
  640. }
  641. static int vtenc_cm_to_avpacket(
  642. AVCodecContext *avctx,
  643. CMSampleBufferRef sample_buffer,
  644. AVPacket *pkt)
  645. {
  646. VTEncContext *vtctx = avctx->priv_data;
  647. int status;
  648. bool is_key_frame;
  649. bool add_header;
  650. char *buf_data;
  651. size_t length_code_size;
  652. size_t header_size = 0;
  653. size_t in_buf_size;
  654. int64_t dts_delta;
  655. int64_t time_base_num;
  656. CMTime pts;
  657. CMTime dts;
  658. CMBlockBufferRef block;
  659. CMVideoFormatDescriptionRef vid_fmt;
  660. vtenc_get_frame_info(sample_buffer, &is_key_frame);
  661. status = get_length_code_size(avctx, sample_buffer, &length_code_size);
  662. if (status) return status;
  663. add_header = is_key_frame && !(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
  664. if (add_header) {
  665. vid_fmt = CMSampleBufferGetFormatDescription(sample_buffer);
  666. if (!vid_fmt) {
  667. av_log(avctx, AV_LOG_ERROR, "Cannot get format description.\n");
  668. }
  669. int status = get_params_size(avctx, vid_fmt, &header_size);
  670. if (status) return status;
  671. }
  672. block = CMSampleBufferGetDataBuffer(sample_buffer);
  673. if (!block) {
  674. av_log(avctx, AV_LOG_ERROR, "Could not get block buffer from sample buffer.\n");
  675. return AVERROR_EXTERNAL;
  676. }
  677. status = CMBlockBufferGetDataPointer(block, 0, &in_buf_size, NULL, &buf_data);
  678. if (status) {
  679. av_log(avctx, AV_LOG_ERROR, "Error: cannot get data pointer: %d\n", status);
  680. return AVERROR_EXTERNAL;
  681. }
  682. size_t out_buf_size = header_size + in_buf_size;
  683. bool can_reuse_cmbuffer = !add_header &&
  684. !pkt->data &&
  685. length_code_size == sizeof(start_code);
  686. av_init_packet(pkt);
  687. if (can_reuse_cmbuffer) {
  688. AVBufferRef* buf_ref = av_buffer_create(
  689. buf_data,
  690. out_buf_size,
  691. vtenc_free_block,
  692. block,
  693. 0
  694. );
  695. if (!buf_ref) return AVERROR(ENOMEM);
  696. CFRetain(block);
  697. pkt->buf = buf_ref;
  698. pkt->data = buf_data;
  699. pkt->size = in_buf_size;
  700. status = replace_length_codes(length_code_size, pkt->data, pkt->size);
  701. if (status) {
  702. av_log(avctx, AV_LOG_ERROR, "Error replacing length codes: %d\n", status);
  703. return status;
  704. }
  705. } else {
  706. if (!pkt->data) {
  707. status = av_new_packet(pkt, out_buf_size);
  708. if(status) return status;
  709. }
  710. if (pkt->size < out_buf_size) {
  711. av_log(avctx, AV_LOG_ERROR, "Error: packet's buffer is too small.\n");
  712. return AVERROR_BUFFER_TOO_SMALL;
  713. }
  714. if (add_header) {
  715. status = copy_param_sets(avctx, vid_fmt, pkt->data, out_buf_size);
  716. if(status) return status;
  717. }
  718. status = copy_replace_length_codes(
  719. length_code_size,
  720. buf_data,
  721. in_buf_size,
  722. pkt->data + header_size,
  723. pkt->size - header_size
  724. );
  725. if (status) {
  726. av_log(avctx, AV_LOG_ERROR, "Error copying packet data: %d", status);
  727. return status;
  728. }
  729. }
  730. if (is_key_frame) {
  731. pkt->flags |= AV_PKT_FLAG_KEY;
  732. }
  733. pts = CMSampleBufferGetPresentationTimeStamp(sample_buffer);
  734. dts = CMSampleBufferGetDecodeTimeStamp (sample_buffer);
  735. dts_delta = vtctx->dts_delta >= 0 ? vtctx->dts_delta : 0;
  736. time_base_num = avctx->time_base.num;
  737. pkt->pts = pts.value / time_base_num;
  738. pkt->dts = dts.value / time_base_num - dts_delta;
  739. return 0;
  740. }
  741. /*
  742. * contiguous_buf_size is 0 if not contiguous, and the size of the buffer
  743. * containing all planes if so.
  744. */
  745. static int get_cv_pixel_info(
  746. AVCodecContext *avctx,
  747. const AVFrame *frame,
  748. int *color,
  749. int *plane_count,
  750. size_t *widths,
  751. size_t *heights,
  752. size_t *strides,
  753. size_t *contiguous_buf_size)
  754. {
  755. VTEncContext *vtctx = avctx->priv_data;
  756. int av_format = frame->format;
  757. int av_color_range = av_frame_get_color_range(frame);
  758. int i;
  759. switch (av_format) {
  760. case AV_PIX_FMT_NV12:
  761. switch (av_color_range) {
  762. case AVCOL_RANGE_MPEG:
  763. *color = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
  764. break;
  765. case AVCOL_RANGE_JPEG:
  766. *color = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
  767. break;
  768. default:
  769. if (!vtctx->warned_color_range) {
  770. vtctx->warned_color_range = true;
  771. av_log(avctx, AV_LOG_WARNING, "Color range not set for NV12. Using MPEG range.\n");
  772. }
  773. *color = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
  774. }
  775. *plane_count = 2;
  776. widths [0] = avctx->width;
  777. heights[0] = avctx->height;
  778. strides[0] = frame ? frame->linesize[0] : avctx->width;
  779. widths [1] = (avctx->width + 1) / 2;
  780. heights[1] = (avctx->height + 1) / 2;
  781. strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) & -2;
  782. break;
  783. case AV_PIX_FMT_YUV420P:
  784. switch (av_color_range) {
  785. case AVCOL_RANGE_MPEG:
  786. *color = kCVPixelFormatType_420YpCbCr8Planar;
  787. break;
  788. case AVCOL_RANGE_JPEG:
  789. *color = kCVPixelFormatType_420YpCbCr8PlanarFullRange;
  790. break;
  791. default:
  792. if (!vtctx->warned_color_range) {
  793. vtctx->warned_color_range = true;
  794. av_log(avctx, AV_LOG_WARNING, "Color range not set for YUV 4:2:0. Using MPEG range.\n");
  795. }
  796. *color = kCVPixelFormatType_420YpCbCr8Planar;
  797. }
  798. *plane_count = 3;
  799. widths [0] = avctx->width;
  800. heights[0] = avctx->height;
  801. strides[0] = frame ? frame->linesize[0] : avctx->width;
  802. widths [1] = (avctx->width + 1) / 2;
  803. heights[1] = (avctx->height + 1) / 2;
  804. strides[1] = frame ? frame->linesize[1] : (avctx->width + 1) / 2;
  805. widths [2] = (avctx->width + 1) / 2;
  806. heights[2] = (avctx->height + 1) / 2;
  807. strides[2] = frame ? frame->linesize[2] : (avctx->width + 1) / 2;
  808. break;
  809. default: return AVERROR(EINVAL);
  810. }
  811. *contiguous_buf_size = 0;
  812. for (i = 0; i < *plane_count; i++) {
  813. if (i < *plane_count - 1 &&
  814. frame->data[i] + strides[i] * heights[i] != frame->data[i + 1]) {
  815. *contiguous_buf_size = 0;
  816. break;
  817. }
  818. *contiguous_buf_size += strides[i] * heights[i];
  819. }
  820. return 0;
  821. }
  822. #if !TARGET_OS_IPHONE
  823. //Not used on iOS - frame is always copied.
  824. static void free_avframe(
  825. void *release_ctx,
  826. const void *data,
  827. size_t size,
  828. size_t plane_count,
  829. const void *plane_addresses[])
  830. {
  831. AVFrame *frame = release_ctx;
  832. av_frame_free(&frame);
  833. }
  834. #else
  835. //Not used on OSX - frame is never copied.
  836. static int copy_avframe_to_pixel_buffer(AVCodecContext *avctx,
  837. const AVFrame *frame,
  838. CVPixelBufferRef cv_img,
  839. const size_t *plane_strides,
  840. const size_t *plane_rows)
  841. {
  842. int i, j;
  843. size_t plane_count;
  844. int status;
  845. int rows;
  846. int src_stride;
  847. int dst_stride;
  848. uint8_t *src_addr;
  849. uint8_t *dst_addr;
  850. size_t copy_bytes;
  851. status = CVPixelBufferLockBaseAddress(cv_img, 0);
  852. if (status) {
  853. av_log(
  854. avctx,
  855. AV_LOG_ERROR,
  856. "Error: Could not lock base address of CVPixelBuffer: %d.\n",
  857. status
  858. );
  859. }
  860. if (CVPixelBufferIsPlanar(cv_img)) {
  861. plane_count = CVPixelBufferGetPlaneCount(cv_img);
  862. for (i = 0; frame->data[i]; i++) {
  863. if (i == plane_count) {
  864. CVPixelBufferUnlockBaseAddress(cv_img, 0);
  865. av_log(avctx,
  866. AV_LOG_ERROR,
  867. "Error: different number of planes in AVFrame and CVPixelBuffer.\n"
  868. );
  869. return AVERROR_EXTERNAL;
  870. }
  871. dst_addr = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(cv_img, i);
  872. src_addr = (uint8_t*)frame->data[i];
  873. dst_stride = CVPixelBufferGetBytesPerRowOfPlane(cv_img, i);
  874. src_stride = plane_strides[i];
  875. rows = plane_rows[i];
  876. if (dst_stride == src_stride) {
  877. memcpy(dst_addr, src_addr, src_stride * rows);
  878. } else {
  879. copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
  880. for (j = 0; j < rows; j++) {
  881. memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
  882. }
  883. }
  884. }
  885. } else {
  886. if (frame->data[1]) {
  887. CVPixelBufferUnlockBaseAddress(cv_img, 0);
  888. av_log(avctx,
  889. AV_LOG_ERROR,
  890. "Error: different number of planes in AVFrame and non-planar CVPixelBuffer.\n"
  891. );
  892. return AVERROR_EXTERNAL;
  893. }
  894. dst_addr = (uint8_t*)CVPixelBufferGetBaseAddress(cv_img);
  895. src_addr = (uint8_t*)frame->data[0];
  896. dst_stride = CVPixelBufferGetBytesPerRow(cv_img);
  897. src_stride = plane_strides[0];
  898. rows = plane_rows[0];
  899. if (dst_stride == src_stride) {
  900. memcpy(dst_addr, src_addr, src_stride * rows);
  901. } else {
  902. copy_bytes = dst_stride < src_stride ? dst_stride : src_stride;
  903. for (j = 0; j < rows; j++) {
  904. memcpy(dst_addr + j * dst_stride, src_addr + j * src_stride, copy_bytes);
  905. }
  906. }
  907. }
  908. status = CVPixelBufferUnlockBaseAddress(cv_img, 0);
  909. if (status) {
  910. av_log(avctx, AV_LOG_ERROR, "Error: Could not unlock CVPixelBuffer base address: %d.\n", status);
  911. return AVERROR_EXTERNAL;
  912. }
  913. return 0;
  914. }
  915. #endif //!TARGET_OS_IPHONE
  916. static int create_cv_pixel_buffer(AVCodecContext *avctx,
  917. const AVFrame *frame,
  918. CVPixelBufferRef *cv_img)
  919. {
  920. int plane_count;
  921. int color;
  922. size_t widths [AV_NUM_DATA_POINTERS];
  923. size_t heights[AV_NUM_DATA_POINTERS];
  924. size_t strides[AV_NUM_DATA_POINTERS];
  925. int status;
  926. size_t contiguous_buf_size;
  927. memset(widths, 0, sizeof(widths));
  928. memset(heights, 0, sizeof(heights));
  929. memset(strides, 0, sizeof(strides));
  930. status = get_cv_pixel_info(
  931. avctx,
  932. frame,
  933. &color,
  934. &plane_count,
  935. widths,
  936. heights,
  937. strides,
  938. &contiguous_buf_size
  939. );
  940. if (status) {
  941. av_log(
  942. avctx,
  943. AV_LOG_ERROR,
  944. "Error: Cannot convert format %d color_range %d: %d\n",
  945. frame->format,
  946. av_frame_get_color_range(frame),
  947. status
  948. );
  949. return AVERROR_EXTERNAL;
  950. }
  951. #if TARGET_OS_IPHONE
  952. status = CVPixelBufferCreate(
  953. kCFAllocatorDefault,
  954. frame->width,
  955. frame->height,
  956. color,
  957. NULL,
  958. cv_img
  959. );
  960. if (status) {
  961. return AVERROR_EXTERNAL;
  962. }
  963. status = copy_avframe_to_pixel_buffer(avctx, frame, *cv_img, strides, heights);
  964. if (status) {
  965. CFRelease(*cv_img);
  966. *cv_img = NULL;
  967. return status;
  968. }
  969. #else
  970. AVFrame *enc_frame = av_frame_alloc();
  971. if (!enc_frame) return AVERROR(ENOMEM);
  972. status = av_frame_ref(enc_frame, frame);
  973. if (status) {
  974. av_frame_free(&enc_frame);
  975. return status;
  976. }
  977. status = CVPixelBufferCreateWithPlanarBytes(
  978. kCFAllocatorDefault,
  979. enc_frame->width,
  980. enc_frame->height,
  981. color,
  982. NULL,
  983. contiguous_buf_size,
  984. plane_count,
  985. (void **)enc_frame->data,
  986. widths,
  987. heights,
  988. strides,
  989. free_avframe,
  990. enc_frame,
  991. NULL,
  992. cv_img
  993. );
  994. if (status) {
  995. av_log(avctx, AV_LOG_ERROR, "Error: Could not create CVPixelBuffer: %d\n", status);
  996. return AVERROR_EXTERNAL;
  997. }
  998. #endif
  999. return 0;
  1000. }
  1001. static int vtenc_send_frame(AVCodecContext *avctx,
  1002. VTEncContext *vtctx,
  1003. const AVFrame *frame)
  1004. {
  1005. CMTime time;
  1006. CVPixelBufferRef cv_img = NULL;
  1007. int status = create_cv_pixel_buffer(avctx, frame, &cv_img);
  1008. if (status) return status;
  1009. time = CMTimeMake(frame->pts * avctx->time_base.num, avctx->time_base.den);
  1010. status = VTCompressionSessionEncodeFrame(
  1011. vtctx->session,
  1012. cv_img,
  1013. time,
  1014. kCMTimeInvalid,
  1015. NULL,
  1016. NULL,
  1017. NULL
  1018. );
  1019. CFRelease(cv_img);
  1020. if (status) {
  1021. av_log(avctx, AV_LOG_ERROR, "Error: cannot encode frame: %d\n", status);
  1022. return AVERROR_EXTERNAL;
  1023. }
  1024. return 0;
  1025. }
  1026. static av_cold int vtenc_frame(
  1027. AVCodecContext *avctx,
  1028. AVPacket *pkt,
  1029. const AVFrame *frame,
  1030. int *got_packet)
  1031. {
  1032. VTEncContext *vtctx = avctx->priv_data;
  1033. bool get_frame;
  1034. int status;
  1035. CMSampleBufferRef buf = NULL;
  1036. if (frame) {
  1037. status = vtenc_send_frame(avctx, vtctx, frame);
  1038. if (status) {
  1039. status = AVERROR_EXTERNAL;
  1040. goto end_nopkt;
  1041. }
  1042. if (vtctx->frame_ct_in == 0) {
  1043. vtctx->first_pts = frame->pts;
  1044. } else if(vtctx->frame_ct_in == 1 && vtctx->has_b_frames) {
  1045. vtctx->dts_delta = frame->pts - vtctx->first_pts;
  1046. }
  1047. vtctx->frame_ct_in++;
  1048. } else if(!vtctx->flushing) {
  1049. vtctx->flushing = true;
  1050. status = VTCompressionSessionCompleteFrames(vtctx->session,
  1051. kCMTimeIndefinite);
  1052. if (status) {
  1053. av_log(avctx, AV_LOG_ERROR, "Error flushing frames: %d\n", status);
  1054. status = AVERROR_EXTERNAL;
  1055. goto end_nopkt;
  1056. }
  1057. }
  1058. *got_packet = 0;
  1059. get_frame = vtctx->dts_delta >= 0 || !frame;
  1060. if (!get_frame) {
  1061. status = 0;
  1062. goto end_nopkt;
  1063. }
  1064. status = vtenc_q_pop(vtctx, !frame, &buf);
  1065. if (status) goto end_nopkt;
  1066. if (!buf) goto end_nopkt;
  1067. status = vtenc_cm_to_avpacket(avctx, buf, pkt);
  1068. CFRelease(buf);
  1069. if (status) goto end_nopkt;
  1070. *got_packet = 1;
  1071. return 0;
  1072. end_nopkt:
  1073. av_packet_unref(pkt);
  1074. return status;
  1075. }
  1076. static av_cold int vtenc_close(AVCodecContext *avctx)
  1077. {
  1078. VTEncContext *vtctx = avctx->priv_data;
  1079. if(!vtctx->session) return 0;
  1080. pthread_cond_destroy(&vtctx->cv_sample_sent);
  1081. pthread_mutex_destroy(&vtctx->lock);
  1082. CFRelease(vtctx->session);
  1083. vtctx->session = NULL;
  1084. return 0;
  1085. }
  1086. static const enum AVPixelFormat pix_fmts[] = {
  1087. AV_PIX_FMT_NV12,
  1088. #if !TARGET_OS_IPHONE
  1089. AV_PIX_FMT_YUV420P,
  1090. #endif
  1091. AV_PIX_FMT_NONE
  1092. };
  1093. #define OFFSET(x) offsetof(VTEncContext, x)
  1094. #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
  1095. static const AVOption options[] = {
  1096. { "profile", "Profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = H264_PROF_AUTO }, H264_PROF_AUTO, H264_PROF_COUNT, VE, "profile" },
  1097. { "baseline", "Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_BASELINE }, INT_MIN, INT_MAX, VE, "profile" },
  1098. { "main", "Main Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_MAIN }, INT_MIN, INT_MAX, VE, "profile" },
  1099. { "high", "High Profile", 0, AV_OPT_TYPE_CONST, { .i64 = H264_PROF_HIGH }, INT_MIN, INT_MAX, VE, "profile" },
  1100. { "level", "Level", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 52, VE, "level" },
  1101. { "1.3", "Level 1.3, only available with Baseline Profile", 0, AV_OPT_TYPE_CONST, { .i64 = 13 }, INT_MIN, INT_MAX, VE, "level" },
  1102. { "3.0", "Level 3.0", 0, AV_OPT_TYPE_CONST, { .i64 = 30 }, INT_MIN, INT_MAX, VE, "level" },
  1103. { "3.1", "Level 3.1", 0, AV_OPT_TYPE_CONST, { .i64 = 31 }, INT_MIN, INT_MAX, VE, "level" },
  1104. { "3.2", "Level 3.2", 0, AV_OPT_TYPE_CONST, { .i64 = 32 }, INT_MIN, INT_MAX, VE, "level" },
  1105. { "4.0", "Level 4.0", 0, AV_OPT_TYPE_CONST, { .i64 = 40 }, INT_MIN, INT_MAX, VE, "level" },
  1106. { "4.1", "Level 4.1", 0, AV_OPT_TYPE_CONST, { .i64 = 41 }, INT_MIN, INT_MAX, VE, "level" },
  1107. { "4.2", "Level 4.2", 0, AV_OPT_TYPE_CONST, { .i64 = 42 }, INT_MIN, INT_MAX, VE, "level" },
  1108. { "5.0", "Level 5.0", 0, AV_OPT_TYPE_CONST, { .i64 = 50 }, INT_MIN, INT_MAX, VE, "level" },
  1109. { "5.1", "Level 5.1", 0, AV_OPT_TYPE_CONST, { .i64 = 51 }, INT_MIN, INT_MAX, VE, "level" },
  1110. { "5.2", "Level 5.2", 0, AV_OPT_TYPE_CONST, { .i64 = 52 }, INT_MIN, INT_MAX, VE, "level" },
  1111. { NULL },
  1112. };
  1113. static const AVClass h264_videotoolbox_class = {
  1114. .class_name = "h264_videotoolbox",
  1115. .item_name = av_default_item_name,
  1116. .option = options,
  1117. .version = LIBAVUTIL_VERSION_INT,
  1118. };
  1119. AVCodec ff_h264_videotoolbox_encoder = {
  1120. .name = "h264_videotoolbox",
  1121. .long_name = NULL_IF_CONFIG_SMALL("VideoToolbox H.264 Encoder"),
  1122. .type = AVMEDIA_TYPE_VIDEO,
  1123. .id = AV_CODEC_ID_H264,
  1124. .priv_data_size = sizeof(VTEncContext),
  1125. .pix_fmts = pix_fmts,
  1126. .init = vtenc_init,
  1127. .encode2 = vtenc_frame,
  1128. .close = vtenc_close,
  1129. .capabilities = AV_CODEC_CAP_DELAY,
  1130. .priv_class = &h264_videotoolbox_class,
  1131. .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE |
  1132. FF_CODEC_CAP_INIT_CLEANUP,
  1133. };