You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1092 lines
39KB

  1. /*
  2. * AVFoundation input device
  3. * Copyright (c) 2014 Thilo Borgmann <thilo.borgmann@mail.de>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AVFoundation input device
  24. * @author Thilo Borgmann <thilo.borgmann@mail.de>
  25. */
  26. #import <AVFoundation/AVFoundation.h>
  27. #include <pthread.h>
  28. #include "libavutil/pixdesc.h"
  29. #include "libavutil/opt.h"
  30. #include "libavutil/avstring.h"
  31. #include "libavformat/internal.h"
  32. #include "libavutil/internal.h"
  33. #include "libavutil/parseutils.h"
  34. #include "libavutil/time.h"
  35. #include "libavutil/imgutils.h"
  36. #include "avdevice.h"
  37. static const int avf_time_base = 1000000;
  38. static const AVRational avf_time_base_q = {
  39. .num = 1,
  40. .den = avf_time_base
  41. };
  42. struct AVFPixelFormatSpec {
  43. enum AVPixelFormat ff_id;
  44. OSType avf_id;
  45. };
  46. static const struct AVFPixelFormatSpec avf_pixel_formats[] = {
  47. { AV_PIX_FMT_MONOBLACK, kCVPixelFormatType_1Monochrome },
  48. { AV_PIX_FMT_RGB555BE, kCVPixelFormatType_16BE555 },
  49. { AV_PIX_FMT_RGB555LE, kCVPixelFormatType_16LE555 },
  50. { AV_PIX_FMT_RGB565BE, kCVPixelFormatType_16BE565 },
  51. { AV_PIX_FMT_RGB565LE, kCVPixelFormatType_16LE565 },
  52. { AV_PIX_FMT_RGB24, kCVPixelFormatType_24RGB },
  53. { AV_PIX_FMT_BGR24, kCVPixelFormatType_24BGR },
  54. { AV_PIX_FMT_0RGB, kCVPixelFormatType_32ARGB },
  55. { AV_PIX_FMT_BGR0, kCVPixelFormatType_32BGRA },
  56. { AV_PIX_FMT_0BGR, kCVPixelFormatType_32ABGR },
  57. { AV_PIX_FMT_RGB0, kCVPixelFormatType_32RGBA },
  58. { AV_PIX_FMT_BGR48BE, kCVPixelFormatType_48RGB },
  59. { AV_PIX_FMT_UYVY422, kCVPixelFormatType_422YpCbCr8 },
  60. { AV_PIX_FMT_YUVA444P, kCVPixelFormatType_4444YpCbCrA8R },
  61. { AV_PIX_FMT_YUVA444P16LE, kCVPixelFormatType_4444AYpCbCr16 },
  62. { AV_PIX_FMT_YUV444P, kCVPixelFormatType_444YpCbCr8 },
  63. { AV_PIX_FMT_YUV422P16, kCVPixelFormatType_422YpCbCr16 },
  64. { AV_PIX_FMT_YUV422P10, kCVPixelFormatType_422YpCbCr10 },
  65. { AV_PIX_FMT_YUV444P10, kCVPixelFormatType_444YpCbCr10 },
  66. { AV_PIX_FMT_YUV420P, kCVPixelFormatType_420YpCbCr8Planar },
  67. { AV_PIX_FMT_NV12, kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange },
  68. { AV_PIX_FMT_YUYV422, kCVPixelFormatType_422YpCbCr8_yuvs },
  69. #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1080
  70. { AV_PIX_FMT_GRAY8, kCVPixelFormatType_OneComponent8 },
  71. #endif
  72. { AV_PIX_FMT_NONE, 0 }
  73. };
  74. typedef struct
  75. {
  76. AVClass* class;
  77. int frames_captured;
  78. int audio_frames_captured;
  79. int64_t first_pts;
  80. int64_t first_audio_pts;
  81. pthread_mutex_t frame_lock;
  82. pthread_cond_t frame_wait_cond;
  83. id avf_delegate;
  84. id avf_audio_delegate;
  85. AVRational framerate;
  86. int width, height;
  87. int capture_cursor;
  88. int capture_mouse_clicks;
  89. int list_devices;
  90. int video_device_index;
  91. int video_stream_index;
  92. int audio_device_index;
  93. int audio_stream_index;
  94. char *video_filename;
  95. char *audio_filename;
  96. int num_video_devices;
  97. int audio_channels;
  98. int audio_bits_per_sample;
  99. int audio_float;
  100. int audio_be;
  101. int audio_signed_integer;
  102. int audio_packed;
  103. int audio_non_interleaved;
  104. int32_t *audio_buffer;
  105. int audio_buffer_size;
  106. enum AVPixelFormat pixel_format;
  107. AVCaptureSession *capture_session;
  108. AVCaptureVideoDataOutput *video_output;
  109. AVCaptureAudioDataOutput *audio_output;
  110. CMSampleBufferRef current_frame;
  111. CMSampleBufferRef current_audio_frame;
  112. } AVFContext;
  113. static void lock_frames(AVFContext* ctx)
  114. {
  115. pthread_mutex_lock(&ctx->frame_lock);
  116. }
  117. static void unlock_frames(AVFContext* ctx)
  118. {
  119. pthread_mutex_unlock(&ctx->frame_lock);
  120. }
  121. /** FrameReciever class - delegate for AVCaptureSession
  122. */
  123. @interface AVFFrameReceiver : NSObject
  124. {
  125. AVFContext* _context;
  126. }
  127. - (id)initWithContext:(AVFContext*)context;
  128. - (void) captureOutput:(AVCaptureOutput *)captureOutput
  129. didOutputSampleBuffer:(CMSampleBufferRef)videoFrame
  130. fromConnection:(AVCaptureConnection *)connection;
  131. @end
  132. @implementation AVFFrameReceiver
  133. - (id)initWithContext:(AVFContext*)context
  134. {
  135. if (self = [super init]) {
  136. _context = context;
  137. }
  138. return self;
  139. }
  140. - (void) captureOutput:(AVCaptureOutput *)captureOutput
  141. didOutputSampleBuffer:(CMSampleBufferRef)videoFrame
  142. fromConnection:(AVCaptureConnection *)connection
  143. {
  144. lock_frames(_context);
  145. if (_context->current_frame != nil) {
  146. CFRelease(_context->current_frame);
  147. }
  148. _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame);
  149. pthread_cond_signal(&_context->frame_wait_cond);
  150. unlock_frames(_context);
  151. ++_context->frames_captured;
  152. }
  153. @end
  154. /** AudioReciever class - delegate for AVCaptureSession
  155. */
  156. @interface AVFAudioReceiver : NSObject
  157. {
  158. AVFContext* _context;
  159. }
  160. - (id)initWithContext:(AVFContext*)context;
  161. - (void) captureOutput:(AVCaptureOutput *)captureOutput
  162. didOutputSampleBuffer:(CMSampleBufferRef)audioFrame
  163. fromConnection:(AVCaptureConnection *)connection;
  164. @end
  165. @implementation AVFAudioReceiver
  166. - (id)initWithContext:(AVFContext*)context
  167. {
  168. if (self = [super init]) {
  169. _context = context;
  170. }
  171. return self;
  172. }
  173. - (void) captureOutput:(AVCaptureOutput *)captureOutput
  174. didOutputSampleBuffer:(CMSampleBufferRef)audioFrame
  175. fromConnection:(AVCaptureConnection *)connection
  176. {
  177. lock_frames(_context);
  178. if (_context->current_audio_frame != nil) {
  179. CFRelease(_context->current_audio_frame);
  180. }
  181. _context->current_audio_frame = (CMSampleBufferRef)CFRetain(audioFrame);
  182. pthread_cond_signal(&_context->frame_wait_cond);
  183. unlock_frames(_context);
  184. ++_context->audio_frames_captured;
  185. }
  186. @end
  187. static void destroy_context(AVFContext* ctx)
  188. {
  189. [ctx->capture_session stopRunning];
  190. [ctx->capture_session release];
  191. [ctx->video_output release];
  192. [ctx->audio_output release];
  193. [ctx->avf_delegate release];
  194. [ctx->avf_audio_delegate release];
  195. ctx->capture_session = NULL;
  196. ctx->video_output = NULL;
  197. ctx->audio_output = NULL;
  198. ctx->avf_delegate = NULL;
  199. ctx->avf_audio_delegate = NULL;
  200. av_freep(&ctx->audio_buffer);
  201. pthread_mutex_destroy(&ctx->frame_lock);
  202. pthread_cond_destroy(&ctx->frame_wait_cond);
  203. if (ctx->current_frame) {
  204. CFRelease(ctx->current_frame);
  205. }
  206. }
  207. static void parse_device_name(AVFormatContext *s)
  208. {
  209. AVFContext *ctx = (AVFContext*)s->priv_data;
  210. char *tmp = av_strdup(s->url);
  211. char *save;
  212. if (tmp[0] != ':') {
  213. ctx->video_filename = av_strtok(tmp, ":", &save);
  214. ctx->audio_filename = av_strtok(NULL, ":", &save);
  215. } else {
  216. ctx->audio_filename = av_strtok(tmp, ":", &save);
  217. }
  218. }
  219. /**
  220. * Configure the video device.
  221. *
  222. * Configure the video device using a run-time approach to access properties
  223. * since formats, activeFormat are available since iOS >= 7.0 or OSX >= 10.7
  224. * and activeVideoMaxFrameDuration is available since i0S >= 7.0 and OSX >= 10.9.
  225. *
  226. * The NSUndefinedKeyException must be handled by the caller of this function.
  227. *
  228. */
  229. static int configure_video_device(AVFormatContext *s, AVCaptureDevice *video_device)
  230. {
  231. AVFContext *ctx = (AVFContext*)s->priv_data;
  232. double framerate = av_q2d(ctx->framerate);
  233. NSObject *range = nil;
  234. NSObject *format = nil;
  235. NSObject *selected_range = nil;
  236. NSObject *selected_format = nil;
  237. for (format in [video_device valueForKey:@"formats"]) {
  238. CMFormatDescriptionRef formatDescription;
  239. CMVideoDimensions dimensions;
  240. formatDescription = (CMFormatDescriptionRef) [format performSelector:@selector(formatDescription)];
  241. dimensions = CMVideoFormatDescriptionGetDimensions(formatDescription);
  242. if ((ctx->width == 0 && ctx->height == 0) ||
  243. (dimensions.width == ctx->width && dimensions.height == ctx->height)) {
  244. selected_format = format;
  245. for (range in [format valueForKey:@"videoSupportedFrameRateRanges"]) {
  246. double max_framerate;
  247. [[range valueForKey:@"maxFrameRate"] getValue:&max_framerate];
  248. if (fabs (framerate - max_framerate) < 0.01) {
  249. selected_range = range;
  250. break;
  251. }
  252. }
  253. }
  254. }
  255. if (!selected_format) {
  256. av_log(s, AV_LOG_ERROR, "Selected video size (%dx%d) is not supported by the device\n",
  257. ctx->width, ctx->height);
  258. goto unsupported_format;
  259. }
  260. if (!selected_range) {
  261. av_log(s, AV_LOG_ERROR, "Selected framerate (%f) is not supported by the device\n",
  262. framerate);
  263. goto unsupported_format;
  264. }
  265. if ([video_device lockForConfiguration:NULL] == YES) {
  266. NSValue *min_frame_duration = [selected_range valueForKey:@"minFrameDuration"];
  267. [video_device setValue:selected_format forKey:@"activeFormat"];
  268. [video_device setValue:min_frame_duration forKey:@"activeVideoMinFrameDuration"];
  269. [video_device setValue:min_frame_duration forKey:@"activeVideoMaxFrameDuration"];
  270. } else {
  271. av_log(s, AV_LOG_ERROR, "Could not lock device for configuration");
  272. return AVERROR(EINVAL);
  273. }
  274. return 0;
  275. unsupported_format:
  276. av_log(s, AV_LOG_ERROR, "Supported modes:\n");
  277. for (format in [video_device valueForKey:@"formats"]) {
  278. CMFormatDescriptionRef formatDescription;
  279. CMVideoDimensions dimensions;
  280. formatDescription = (CMFormatDescriptionRef) [format performSelector:@selector(formatDescription)];
  281. dimensions = CMVideoFormatDescriptionGetDimensions(formatDescription);
  282. for (range in [format valueForKey:@"videoSupportedFrameRateRanges"]) {
  283. double min_framerate;
  284. double max_framerate;
  285. [[range valueForKey:@"minFrameRate"] getValue:&min_framerate];
  286. [[range valueForKey:@"maxFrameRate"] getValue:&max_framerate];
  287. av_log(s, AV_LOG_ERROR, " %dx%d@[%f %f]fps\n",
  288. dimensions.width, dimensions.height,
  289. min_framerate, max_framerate);
  290. }
  291. }
  292. return AVERROR(EINVAL);
  293. }
  294. static int add_video_device(AVFormatContext *s, AVCaptureDevice *video_device)
  295. {
  296. AVFContext *ctx = (AVFContext*)s->priv_data;
  297. int ret;
  298. NSError *error = nil;
  299. AVCaptureInput* capture_input = nil;
  300. struct AVFPixelFormatSpec pxl_fmt_spec;
  301. NSNumber *pixel_format;
  302. NSDictionary *capture_dict;
  303. dispatch_queue_t queue;
  304. if (ctx->video_device_index < ctx->num_video_devices) {
  305. capture_input = (AVCaptureInput*) [[[AVCaptureDeviceInput alloc] initWithDevice:video_device error:&error] autorelease];
  306. } else {
  307. capture_input = (AVCaptureInput*) video_device;
  308. }
  309. if (!capture_input) {
  310. av_log(s, AV_LOG_ERROR, "Failed to create AV capture input device: %s\n",
  311. [[error localizedDescription] UTF8String]);
  312. return 1;
  313. }
  314. if ([ctx->capture_session canAddInput:capture_input]) {
  315. [ctx->capture_session addInput:capture_input];
  316. } else {
  317. av_log(s, AV_LOG_ERROR, "can't add video input to capture session\n");
  318. return 1;
  319. }
  320. // Attaching output
  321. ctx->video_output = [[AVCaptureVideoDataOutput alloc] init];
  322. if (!ctx->video_output) {
  323. av_log(s, AV_LOG_ERROR, "Failed to init AV video output\n");
  324. return 1;
  325. }
  326. // Configure device framerate and video size
  327. @try {
  328. if ((ret = configure_video_device(s, video_device)) < 0) {
  329. return ret;
  330. }
  331. } @catch (NSException *exception) {
  332. if (![[exception name] isEqualToString:NSUndefinedKeyException]) {
  333. av_log (s, AV_LOG_ERROR, "An error occurred: %s", [exception.reason UTF8String]);
  334. return AVERROR_EXTERNAL;
  335. }
  336. }
  337. // select pixel format
  338. pxl_fmt_spec.ff_id = AV_PIX_FMT_NONE;
  339. for (int i = 0; avf_pixel_formats[i].ff_id != AV_PIX_FMT_NONE; i++) {
  340. if (ctx->pixel_format == avf_pixel_formats[i].ff_id) {
  341. pxl_fmt_spec = avf_pixel_formats[i];
  342. break;
  343. }
  344. }
  345. // check if selected pixel format is supported by AVFoundation
  346. if (pxl_fmt_spec.ff_id == AV_PIX_FMT_NONE) {
  347. av_log(s, AV_LOG_ERROR, "Selected pixel format (%s) is not supported by AVFoundation.\n",
  348. av_get_pix_fmt_name(pxl_fmt_spec.ff_id));
  349. return 1;
  350. }
  351. // check if the pixel format is available for this device
  352. if ([[ctx->video_output availableVideoCVPixelFormatTypes] indexOfObject:[NSNumber numberWithInt:pxl_fmt_spec.avf_id]] == NSNotFound) {
  353. av_log(s, AV_LOG_ERROR, "Selected pixel format (%s) is not supported by the input device.\n",
  354. av_get_pix_fmt_name(pxl_fmt_spec.ff_id));
  355. pxl_fmt_spec.ff_id = AV_PIX_FMT_NONE;
  356. av_log(s, AV_LOG_ERROR, "Supported pixel formats:\n");
  357. for (NSNumber *pxl_fmt in [ctx->video_output availableVideoCVPixelFormatTypes]) {
  358. struct AVFPixelFormatSpec pxl_fmt_dummy;
  359. pxl_fmt_dummy.ff_id = AV_PIX_FMT_NONE;
  360. for (int i = 0; avf_pixel_formats[i].ff_id != AV_PIX_FMT_NONE; i++) {
  361. if ([pxl_fmt intValue] == avf_pixel_formats[i].avf_id) {
  362. pxl_fmt_dummy = avf_pixel_formats[i];
  363. break;
  364. }
  365. }
  366. if (pxl_fmt_dummy.ff_id != AV_PIX_FMT_NONE) {
  367. av_log(s, AV_LOG_ERROR, " %s\n", av_get_pix_fmt_name(pxl_fmt_dummy.ff_id));
  368. // select first supported pixel format instead of user selected (or default) pixel format
  369. if (pxl_fmt_spec.ff_id == AV_PIX_FMT_NONE) {
  370. pxl_fmt_spec = pxl_fmt_dummy;
  371. }
  372. }
  373. }
  374. // fail if there is no appropriate pixel format or print a warning about overriding the pixel format
  375. if (pxl_fmt_spec.ff_id == AV_PIX_FMT_NONE) {
  376. return 1;
  377. } else {
  378. av_log(s, AV_LOG_WARNING, "Overriding selected pixel format to use %s instead.\n",
  379. av_get_pix_fmt_name(pxl_fmt_spec.ff_id));
  380. }
  381. }
  382. ctx->pixel_format = pxl_fmt_spec.ff_id;
  383. pixel_format = [NSNumber numberWithUnsignedInt:pxl_fmt_spec.avf_id];
  384. capture_dict = [NSDictionary dictionaryWithObject:pixel_format
  385. forKey:(id)kCVPixelBufferPixelFormatTypeKey];
  386. [ctx->video_output setVideoSettings:capture_dict];
  387. [ctx->video_output setAlwaysDiscardsLateVideoFrames:YES];
  388. ctx->avf_delegate = [[AVFFrameReceiver alloc] initWithContext:ctx];
  389. queue = dispatch_queue_create("avf_queue", NULL);
  390. [ctx->video_output setSampleBufferDelegate:ctx->avf_delegate queue:queue];
  391. dispatch_release(queue);
  392. if ([ctx->capture_session canAddOutput:ctx->video_output]) {
  393. [ctx->capture_session addOutput:ctx->video_output];
  394. } else {
  395. av_log(s, AV_LOG_ERROR, "can't add video output to capture session\n");
  396. return 1;
  397. }
  398. return 0;
  399. }
  400. static int add_audio_device(AVFormatContext *s, AVCaptureDevice *audio_device)
  401. {
  402. AVFContext *ctx = (AVFContext*)s->priv_data;
  403. NSError *error = nil;
  404. AVCaptureDeviceInput* audio_dev_input = [[[AVCaptureDeviceInput alloc] initWithDevice:audio_device error:&error] autorelease];
  405. dispatch_queue_t queue;
  406. if (!audio_dev_input) {
  407. av_log(s, AV_LOG_ERROR, "Failed to create AV capture input device: %s\n",
  408. [[error localizedDescription] UTF8String]);
  409. return 1;
  410. }
  411. if ([ctx->capture_session canAddInput:audio_dev_input]) {
  412. [ctx->capture_session addInput:audio_dev_input];
  413. } else {
  414. av_log(s, AV_LOG_ERROR, "can't add audio input to capture session\n");
  415. return 1;
  416. }
  417. // Attaching output
  418. ctx->audio_output = [[AVCaptureAudioDataOutput alloc] init];
  419. if (!ctx->audio_output) {
  420. av_log(s, AV_LOG_ERROR, "Failed to init AV audio output\n");
  421. return 1;
  422. }
  423. ctx->avf_audio_delegate = [[AVFAudioReceiver alloc] initWithContext:ctx];
  424. queue = dispatch_queue_create("avf_audio_queue", NULL);
  425. [ctx->audio_output setSampleBufferDelegate:ctx->avf_audio_delegate queue:queue];
  426. dispatch_release(queue);
  427. if ([ctx->capture_session canAddOutput:ctx->audio_output]) {
  428. [ctx->capture_session addOutput:ctx->audio_output];
  429. } else {
  430. av_log(s, AV_LOG_ERROR, "adding audio output to capture session failed\n");
  431. return 1;
  432. }
  433. return 0;
  434. }
  435. static int get_video_config(AVFormatContext *s)
  436. {
  437. AVFContext *ctx = (AVFContext*)s->priv_data;
  438. CVImageBufferRef image_buffer;
  439. CGSize image_buffer_size;
  440. AVStream* stream = avformat_new_stream(s, NULL);
  441. if (!stream) {
  442. return 1;
  443. }
  444. // Take stream info from the first frame.
  445. while (ctx->frames_captured < 1) {
  446. CFRunLoopRunInMode(kCFRunLoopDefaultMode, 0.1, YES);
  447. }
  448. lock_frames(ctx);
  449. ctx->video_stream_index = stream->index;
  450. avpriv_set_pts_info(stream, 64, 1, avf_time_base);
  451. image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame);
  452. image_buffer_size = CVImageBufferGetEncodedSize(image_buffer);
  453. stream->codecpar->codec_id = AV_CODEC_ID_RAWVIDEO;
  454. stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
  455. stream->codecpar->width = (int)image_buffer_size.width;
  456. stream->codecpar->height = (int)image_buffer_size.height;
  457. stream->codecpar->format = ctx->pixel_format;
  458. CFRelease(ctx->current_frame);
  459. ctx->current_frame = nil;
  460. unlock_frames(ctx);
  461. return 0;
  462. }
  463. static int get_audio_config(AVFormatContext *s)
  464. {
  465. AVFContext *ctx = (AVFContext*)s->priv_data;
  466. CMFormatDescriptionRef format_desc;
  467. AVStream* stream = avformat_new_stream(s, NULL);
  468. if (!stream) {
  469. return 1;
  470. }
  471. // Take stream info from the first frame.
  472. while (ctx->audio_frames_captured < 1) {
  473. CFRunLoopRunInMode(kCFRunLoopDefaultMode, 0.1, YES);
  474. }
  475. lock_frames(ctx);
  476. ctx->audio_stream_index = stream->index;
  477. avpriv_set_pts_info(stream, 64, 1, avf_time_base);
  478. format_desc = CMSampleBufferGetFormatDescription(ctx->current_audio_frame);
  479. const AudioStreamBasicDescription *basic_desc = CMAudioFormatDescriptionGetStreamBasicDescription(format_desc);
  480. if (!basic_desc) {
  481. av_log(s, AV_LOG_ERROR, "audio format not available\n");
  482. return 1;
  483. }
  484. stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
  485. stream->codecpar->sample_rate = basic_desc->mSampleRate;
  486. stream->codecpar->channels = basic_desc->mChannelsPerFrame;
  487. stream->codecpar->channel_layout = av_get_default_channel_layout(stream->codecpar->channels);
  488. ctx->audio_channels = basic_desc->mChannelsPerFrame;
  489. ctx->audio_bits_per_sample = basic_desc->mBitsPerChannel;
  490. ctx->audio_float = basic_desc->mFormatFlags & kAudioFormatFlagIsFloat;
  491. ctx->audio_be = basic_desc->mFormatFlags & kAudioFormatFlagIsBigEndian;
  492. ctx->audio_signed_integer = basic_desc->mFormatFlags & kAudioFormatFlagIsSignedInteger;
  493. ctx->audio_packed = basic_desc->mFormatFlags & kAudioFormatFlagIsPacked;
  494. ctx->audio_non_interleaved = basic_desc->mFormatFlags & kAudioFormatFlagIsNonInterleaved;
  495. if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
  496. ctx->audio_float &&
  497. ctx->audio_bits_per_sample == 32 &&
  498. ctx->audio_packed) {
  499. stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_F32BE : AV_CODEC_ID_PCM_F32LE;
  500. } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
  501. ctx->audio_signed_integer &&
  502. ctx->audio_bits_per_sample == 16 &&
  503. ctx->audio_packed) {
  504. stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S16BE : AV_CODEC_ID_PCM_S16LE;
  505. } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
  506. ctx->audio_signed_integer &&
  507. ctx->audio_bits_per_sample == 24 &&
  508. ctx->audio_packed) {
  509. stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S24BE : AV_CODEC_ID_PCM_S24LE;
  510. } else if (basic_desc->mFormatID == kAudioFormatLinearPCM &&
  511. ctx->audio_signed_integer &&
  512. ctx->audio_bits_per_sample == 32 &&
  513. ctx->audio_packed) {
  514. stream->codecpar->codec_id = ctx->audio_be ? AV_CODEC_ID_PCM_S32BE : AV_CODEC_ID_PCM_S32LE;
  515. } else {
  516. av_log(s, AV_LOG_ERROR, "audio format is not supported\n");
  517. return 1;
  518. }
  519. if (ctx->audio_non_interleaved) {
  520. CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
  521. ctx->audio_buffer_size = CMBlockBufferGetDataLength(block_buffer);
  522. ctx->audio_buffer = av_malloc(ctx->audio_buffer_size);
  523. if (!ctx->audio_buffer) {
  524. av_log(s, AV_LOG_ERROR, "error allocating audio buffer\n");
  525. return 1;
  526. }
  527. }
  528. CFRelease(ctx->current_audio_frame);
  529. ctx->current_audio_frame = nil;
  530. unlock_frames(ctx);
  531. return 0;
  532. }
  533. static int avf_read_header(AVFormatContext *s)
  534. {
  535. NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
  536. int capture_screen = 0;
  537. uint32_t num_screens = 0;
  538. AVFContext *ctx = (AVFContext*)s->priv_data;
  539. AVCaptureDevice *video_device = nil;
  540. AVCaptureDevice *audio_device = nil;
  541. // Find capture device
  542. NSArray *devices = [AVCaptureDevice devicesWithMediaType:AVMediaTypeVideo];
  543. ctx->num_video_devices = [devices count];
  544. ctx->first_pts = av_gettime();
  545. ctx->first_audio_pts = av_gettime();
  546. pthread_mutex_init(&ctx->frame_lock, NULL);
  547. pthread_cond_init(&ctx->frame_wait_cond, NULL);
  548. #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
  549. CGGetActiveDisplayList(0, NULL, &num_screens);
  550. #endif
  551. // List devices if requested
  552. if (ctx->list_devices) {
  553. int index = 0;
  554. av_log(ctx, AV_LOG_INFO, "AVFoundation video devices:\n");
  555. for (AVCaptureDevice *device in devices) {
  556. const char *name = [[device localizedName] UTF8String];
  557. index = [devices indexOfObject:device];
  558. av_log(ctx, AV_LOG_INFO, "[%d] %s\n", index, name);
  559. index++;
  560. }
  561. #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
  562. if (num_screens > 0) {
  563. CGDirectDisplayID screens[num_screens];
  564. CGGetActiveDisplayList(num_screens, screens, &num_screens);
  565. for (int i = 0; i < num_screens; i++) {
  566. av_log(ctx, AV_LOG_INFO, "[%d] Capture screen %d\n", index + i, i);
  567. }
  568. }
  569. #endif
  570. av_log(ctx, AV_LOG_INFO, "AVFoundation audio devices:\n");
  571. devices = [AVCaptureDevice devicesWithMediaType:AVMediaTypeAudio];
  572. for (AVCaptureDevice *device in devices) {
  573. const char *name = [[device localizedName] UTF8String];
  574. int index = [devices indexOfObject:device];
  575. av_log(ctx, AV_LOG_INFO, "[%d] %s\n", index, name);
  576. }
  577. goto fail;
  578. }
  579. // parse input filename for video and audio device
  580. parse_device_name(s);
  581. // check for device index given in filename
  582. if (ctx->video_device_index == -1 && ctx->video_filename) {
  583. sscanf(ctx->video_filename, "%d", &ctx->video_device_index);
  584. }
  585. if (ctx->audio_device_index == -1 && ctx->audio_filename) {
  586. sscanf(ctx->audio_filename, "%d", &ctx->audio_device_index);
  587. }
  588. if (ctx->video_device_index >= 0) {
  589. if (ctx->video_device_index < ctx->num_video_devices) {
  590. video_device = [devices objectAtIndex:ctx->video_device_index];
  591. } else if (ctx->video_device_index < ctx->num_video_devices + num_screens) {
  592. #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
  593. CGDirectDisplayID screens[num_screens];
  594. CGGetActiveDisplayList(num_screens, screens, &num_screens);
  595. AVCaptureScreenInput* capture_screen_input = [[[AVCaptureScreenInput alloc] initWithDisplayID:screens[ctx->video_device_index - ctx->num_video_devices]] autorelease];
  596. if (ctx->framerate.num > 0) {
  597. capture_screen_input.minFrameDuration = CMTimeMake(ctx->framerate.den, ctx->framerate.num);
  598. }
  599. #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1080
  600. if (ctx->capture_cursor) {
  601. capture_screen_input.capturesCursor = YES;
  602. } else {
  603. capture_screen_input.capturesCursor = NO;
  604. }
  605. #endif
  606. if (ctx->capture_mouse_clicks) {
  607. capture_screen_input.capturesMouseClicks = YES;
  608. } else {
  609. capture_screen_input.capturesMouseClicks = NO;
  610. }
  611. video_device = (AVCaptureDevice*) capture_screen_input;
  612. capture_screen = 1;
  613. #endif
  614. } else {
  615. av_log(ctx, AV_LOG_ERROR, "Invalid device index\n");
  616. goto fail;
  617. }
  618. } else if (ctx->video_filename &&
  619. strncmp(ctx->video_filename, "none", 4)) {
  620. if (!strncmp(ctx->video_filename, "default", 7)) {
  621. video_device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo];
  622. } else {
  623. // looking for video inputs
  624. for (AVCaptureDevice *device in devices) {
  625. if (!strncmp(ctx->video_filename, [[device localizedName] UTF8String], strlen(ctx->video_filename))) {
  626. video_device = device;
  627. break;
  628. }
  629. }
  630. #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070
  631. // looking for screen inputs
  632. if (!video_device) {
  633. int idx;
  634. if(sscanf(ctx->video_filename, "Capture screen %d", &idx) && idx < num_screens) {
  635. CGDirectDisplayID screens[num_screens];
  636. CGGetActiveDisplayList(num_screens, screens, &num_screens);
  637. AVCaptureScreenInput* capture_screen_input = [[[AVCaptureScreenInput alloc] initWithDisplayID:screens[idx]] autorelease];
  638. video_device = (AVCaptureDevice*) capture_screen_input;
  639. ctx->video_device_index = ctx->num_video_devices + idx;
  640. capture_screen = 1;
  641. if (ctx->framerate.num > 0) {
  642. capture_screen_input.minFrameDuration = CMTimeMake(ctx->framerate.den, ctx->framerate.num);
  643. }
  644. #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1080
  645. if (ctx->capture_cursor) {
  646. capture_screen_input.capturesCursor = YES;
  647. } else {
  648. capture_screen_input.capturesCursor = NO;
  649. }
  650. #endif
  651. if (ctx->capture_mouse_clicks) {
  652. capture_screen_input.capturesMouseClicks = YES;
  653. } else {
  654. capture_screen_input.capturesMouseClicks = NO;
  655. }
  656. }
  657. }
  658. #endif
  659. }
  660. if (!video_device) {
  661. av_log(ctx, AV_LOG_ERROR, "Video device not found\n");
  662. goto fail;
  663. }
  664. }
  665. // get audio device
  666. if (ctx->audio_device_index >= 0) {
  667. NSArray *devices = [AVCaptureDevice devicesWithMediaType:AVMediaTypeAudio];
  668. if (ctx->audio_device_index >= [devices count]) {
  669. av_log(ctx, AV_LOG_ERROR, "Invalid audio device index\n");
  670. goto fail;
  671. }
  672. audio_device = [devices objectAtIndex:ctx->audio_device_index];
  673. } else if (ctx->audio_filename &&
  674. strncmp(ctx->audio_filename, "none", 4)) {
  675. if (!strncmp(ctx->audio_filename, "default", 7)) {
  676. audio_device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeAudio];
  677. } else {
  678. NSArray *devices = [AVCaptureDevice devicesWithMediaType:AVMediaTypeAudio];
  679. for (AVCaptureDevice *device in devices) {
  680. if (!strncmp(ctx->audio_filename, [[device localizedName] UTF8String], strlen(ctx->audio_filename))) {
  681. audio_device = device;
  682. break;
  683. }
  684. }
  685. }
  686. if (!audio_device) {
  687. av_log(ctx, AV_LOG_ERROR, "Audio device not found\n");
  688. goto fail;
  689. }
  690. }
  691. // Video nor Audio capture device not found, looking for AVMediaTypeVideo/Audio
  692. if (!video_device && !audio_device) {
  693. av_log(s, AV_LOG_ERROR, "No AV capture device found\n");
  694. goto fail;
  695. }
  696. if (video_device) {
  697. if (ctx->video_device_index < ctx->num_video_devices) {
  698. av_log(s, AV_LOG_DEBUG, "'%s' opened\n", [[video_device localizedName] UTF8String]);
  699. } else {
  700. av_log(s, AV_LOG_DEBUG, "'%s' opened\n", [[video_device description] UTF8String]);
  701. }
  702. }
  703. if (audio_device) {
  704. av_log(s, AV_LOG_DEBUG, "audio device '%s' opened\n", [[audio_device localizedName] UTF8String]);
  705. }
  706. // Initialize capture session
  707. ctx->capture_session = [[AVCaptureSession alloc] init];
  708. if (video_device && add_video_device(s, video_device)) {
  709. goto fail;
  710. }
  711. if (audio_device && add_audio_device(s, audio_device)) {
  712. }
  713. [ctx->capture_session startRunning];
  714. /* Unlock device configuration only after the session is started so it
  715. * does not reset the capture formats */
  716. if (!capture_screen) {
  717. [video_device unlockForConfiguration];
  718. }
  719. if (video_device && get_video_config(s)) {
  720. goto fail;
  721. }
  722. // set audio stream
  723. if (audio_device && get_audio_config(s)) {
  724. goto fail;
  725. }
  726. [pool release];
  727. return 0;
  728. fail:
  729. [pool release];
  730. destroy_context(ctx);
  731. return AVERROR(EIO);
  732. }
  733. static int copy_cvpixelbuffer(AVFormatContext *s,
  734. CVPixelBufferRef image_buffer,
  735. AVPacket *pkt)
  736. {
  737. AVFContext *ctx = s->priv_data;
  738. int src_linesize[4];
  739. const uint8_t *src_data[4];
  740. int width = CVPixelBufferGetWidth(image_buffer);
  741. int height = CVPixelBufferGetHeight(image_buffer);
  742. int status;
  743. memset(src_linesize, 0, sizeof(src_linesize));
  744. memset(src_data, 0, sizeof(src_data));
  745. status = CVPixelBufferLockBaseAddress(image_buffer, 0);
  746. if (status != kCVReturnSuccess) {
  747. av_log(s, AV_LOG_ERROR, "Could not lock base address: %d\n", status);
  748. return AVERROR_EXTERNAL;
  749. }
  750. if (CVPixelBufferIsPlanar(image_buffer)) {
  751. size_t plane_count = CVPixelBufferGetPlaneCount(image_buffer);
  752. int i;
  753. for(i = 0; i < plane_count; i++){
  754. src_linesize[i] = CVPixelBufferGetBytesPerRowOfPlane(image_buffer, i);
  755. src_data[i] = CVPixelBufferGetBaseAddressOfPlane(image_buffer, i);
  756. }
  757. } else {
  758. src_linesize[0] = CVPixelBufferGetBytesPerRow(image_buffer);
  759. src_data[0] = CVPixelBufferGetBaseAddress(image_buffer);
  760. }
  761. status = av_image_copy_to_buffer(pkt->data, pkt->size,
  762. src_data, src_linesize,
  763. ctx->pixel_format, width, height, 1);
  764. CVPixelBufferUnlockBaseAddress(image_buffer, 0);
  765. return status;
  766. }
  767. static int avf_read_packet(AVFormatContext *s, AVPacket *pkt)
  768. {
  769. AVFContext* ctx = (AVFContext*)s->priv_data;
  770. do {
  771. CVImageBufferRef image_buffer;
  772. lock_frames(ctx);
  773. image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame);
  774. if (ctx->current_frame != nil) {
  775. int status;
  776. if (av_new_packet(pkt, (int)CVPixelBufferGetDataSize(image_buffer)) < 0) {
  777. return AVERROR(EIO);
  778. }
  779. CMItemCount count;
  780. CMSampleTimingInfo timing_info;
  781. if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1, &timing_info, &count) == noErr) {
  782. AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
  783. pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
  784. }
  785. pkt->stream_index = ctx->video_stream_index;
  786. pkt->flags |= AV_PKT_FLAG_KEY;
  787. status = copy_cvpixelbuffer(s, image_buffer, pkt);
  788. CFRelease(ctx->current_frame);
  789. ctx->current_frame = nil;
  790. if (status < 0)
  791. return status;
  792. } else if (ctx->current_audio_frame != nil) {
  793. CMBlockBufferRef block_buffer = CMSampleBufferGetDataBuffer(ctx->current_audio_frame);
  794. int block_buffer_size = CMBlockBufferGetDataLength(block_buffer);
  795. if (!block_buffer || !block_buffer_size) {
  796. return AVERROR(EIO);
  797. }
  798. if (ctx->audio_non_interleaved && block_buffer_size > ctx->audio_buffer_size) {
  799. return AVERROR_BUFFER_TOO_SMALL;
  800. }
  801. if (av_new_packet(pkt, block_buffer_size) < 0) {
  802. return AVERROR(EIO);
  803. }
  804. CMItemCount count;
  805. CMSampleTimingInfo timing_info;
  806. if (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, &timing_info, &count) == noErr) {
  807. AVRational timebase_q = av_make_q(1, timing_info.presentationTimeStamp.timescale);
  808. pkt->pts = pkt->dts = av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, avf_time_base_q);
  809. }
  810. pkt->stream_index = ctx->audio_stream_index;
  811. pkt->flags |= AV_PKT_FLAG_KEY;
  812. if (ctx->audio_non_interleaved) {
  813. int sample, c, shift, num_samples;
  814. OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, ctx->audio_buffer);
  815. if (ret != kCMBlockBufferNoErr) {
  816. return AVERROR(EIO);
  817. }
  818. num_samples = pkt->size / (ctx->audio_channels * (ctx->audio_bits_per_sample >> 3));
  819. // transform decoded frame into output format
  820. #define INTERLEAVE_OUTPUT(bps) \
  821. { \
  822. int##bps##_t **src; \
  823. int##bps##_t *dest; \
  824. src = av_malloc(ctx->audio_channels * sizeof(int##bps##_t*)); \
  825. if (!src) return AVERROR(EIO); \
  826. for (c = 0; c < ctx->audio_channels; c++) { \
  827. src[c] = ((int##bps##_t*)ctx->audio_buffer) + c * num_samples; \
  828. } \
  829. dest = (int##bps##_t*)pkt->data; \
  830. shift = bps - ctx->audio_bits_per_sample; \
  831. for (sample = 0; sample < num_samples; sample++) \
  832. for (c = 0; c < ctx->audio_channels; c++) \
  833. *dest++ = src[c][sample] << shift; \
  834. av_freep(&src); \
  835. }
  836. if (ctx->audio_bits_per_sample <= 16) {
  837. INTERLEAVE_OUTPUT(16)
  838. } else {
  839. INTERLEAVE_OUTPUT(32)
  840. }
  841. } else {
  842. OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, 0, pkt->size, pkt->data);
  843. if (ret != kCMBlockBufferNoErr) {
  844. return AVERROR(EIO);
  845. }
  846. }
  847. CFRelease(ctx->current_audio_frame);
  848. ctx->current_audio_frame = nil;
  849. } else {
  850. pkt->data = NULL;
  851. pthread_cond_wait(&ctx->frame_wait_cond, &ctx->frame_lock);
  852. }
  853. unlock_frames(ctx);
  854. } while (!pkt->data);
  855. return 0;
  856. }
  857. static int avf_close(AVFormatContext *s)
  858. {
  859. AVFContext* ctx = (AVFContext*)s->priv_data;
  860. destroy_context(ctx);
  861. return 0;
  862. }
  863. static const AVOption options[] = {
  864. { "list_devices", "list available devices", offsetof(AVFContext, list_devices), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM, "list_devices" },
  865. { "true", "", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "list_devices" },
  866. { "false", "", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AV_OPT_FLAG_DECODING_PARAM, "list_devices" },
  867. { "video_device_index", "select video device by index for devices with same name (starts at 0)", offsetof(AVFContext, video_device_index), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
  868. { "audio_device_index", "select audio device by index for devices with same name (starts at 0)", offsetof(AVFContext, audio_device_index), AV_OPT_TYPE_INT, {.i64 = -1}, -1, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
  869. { "pixel_format", "set pixel format", offsetof(AVFContext, pixel_format), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_YUV420P}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM},
  870. { "framerate", "set frame rate", offsetof(AVFContext, framerate), AV_OPT_TYPE_VIDEO_RATE, {.str = "ntsc"}, 0, INT_MAX, AV_OPT_FLAG_DECODING_PARAM },
  871. { "video_size", "set video size", offsetof(AVFContext, width), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, AV_OPT_FLAG_DECODING_PARAM },
  872. { "capture_cursor", "capture the screen cursor", offsetof(AVFContext, capture_cursor), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
  873. { "capture_mouse_clicks", "capture the screen mouse clicks", offsetof(AVFContext, capture_mouse_clicks), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, AV_OPT_FLAG_DECODING_PARAM },
  874. { NULL },
  875. };
  876. static const AVClass avf_class = {
  877. .class_name = "AVFoundation input device",
  878. .item_name = av_default_item_name,
  879. .option = options,
  880. .version = LIBAVUTIL_VERSION_INT,
  881. .category = AV_CLASS_CATEGORY_DEVICE_VIDEO_INPUT,
  882. };
  883. AVInputFormat ff_avfoundation_demuxer = {
  884. .name = "avfoundation",
  885. .long_name = NULL_IF_CONFIG_SMALL("AVFoundation input device"),
  886. .priv_data_size = sizeof(AVFContext),
  887. .read_header = avf_read_header,
  888. .read_packet = avf_read_packet,
  889. .read_close = avf_close,
  890. .flags = AVFMT_NOFILE,
  891. .priv_class = &avf_class,
  892. };