On Sun., Feb. 28, 2021, 8:30 p.m. Mark Reid, <[email protected]> wrote:
> > > On Sat, Feb 13, 2021 at 10:04 PM <[email protected]> wrote: > >> From: Mark Reid <[email protected]> >> >> Hi, >> This patch fixes audio issues I've had with some capture devices. The >> audio >> gets really choppy and stops working. This seems to be because >> avf_read_packet >> stops outputting the audio frames because a video frame happens to be >> available first. >> >> It base on the approach used in a patch from #4437 >> https://trac.ffmpeg.org/ticket/4437 >> >> My approach uses an AVFifoBuffer instead of NSMutableArray and also >> outputs the packets in the same order they arrive from AVFFoundation. >> >> should fix ticket #4437 and #4513 >> >> >> --- >> libavdevice/avfoundation.m | 160 ++++++++++++++++++++++++++++--------- >> 1 file changed, 124 insertions(+), 36 deletions(-) >> >> diff --git a/libavdevice/avfoundation.m b/libavdevice/avfoundation.m >> index 59d5b0af4f..5ac6ec4183 100644 >> --- a/libavdevice/avfoundation.m >> +++ b/libavdevice/avfoundation.m >> @@ -31,13 +31,17 @@ >> #include "libavutil/pixdesc.h" >> #include "libavutil/opt.h" >> #include "libavutil/avstring.h" >> +#include "libavutil/avassert.h" >> #include "libavformat/internal.h" >> #include "libavutil/internal.h" >> #include "libavutil/parseutils.h" >> #include "libavutil/time.h" >> #include "libavutil/imgutils.h" >> +#include "libavutil/fifo.h" >> #include "avdevice.h" >> >> +#define FIFO_SIZE 4 >> + >> static const int avf_time_base = 1000000; >> >> static const AVRational avf_time_base_q = { >> @@ -128,8 +132,8 @@ typedef struct >> AVCaptureSession *capture_session; >> AVCaptureVideoDataOutput *video_output; >> AVCaptureAudioDataOutput *audio_output; >> - CMSampleBufferRef current_frame; >> - CMSampleBufferRef current_audio_frame; >> + AVFifoBuffer *video_fifo; >> + AVFifoBuffer *audio_fifo; >> >> AVCaptureDevice *observed_device; >> #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 >> @@ -138,6 +142,11 @@ typedef struct >> int observed_quit; >> } AVFContext; >> >> +typedef struct { >> + int64_t ts; >> + CMSampleBufferRef frame; >> +} BufferRef; >> + >> static void lock_frames(AVFContext* ctx) >> { >> pthread_mutex_lock(&ctx->frame_lock); >> @@ -148,6 +157,48 @@ static void unlock_frames(AVFContext* ctx) >> pthread_mutex_unlock(&ctx->frame_lock); >> } >> >> +static inline void fifo_write(AVFifoBuffer* f, int64_t ts, >> CMSampleBufferRef frame) >> +{ >> + BufferRef buf = { >> + .ts = ts, >> + .frame = frame, >> + }; >> + >> + CFRetain(frame); >> + av_fifo_generic_write(f, &buf, sizeof(BufferRef), NULL); >> +} >> + >> +static inline void fifo_peek(AVFifoBuffer* f, BufferRef *buf) >> +{ >> + if (av_fifo_size(f)) { >> + av_fifo_generic_peek(f, buf, sizeof(BufferRef), NULL); >> + return; >> + } >> + buf->frame = nil; >> + return; >> +} >> + >> +static inline void fifo_drain(AVFifoBuffer* f, int release) >> +{ >> + av_assert2(av_fifo_size(f) >= sizeof(BufferRef)); >> + if (release) { >> + BufferRef buf; >> + fifo_peek(f, &buf); >> + CFRelease(buf.frame); >> + } >> + av_fifo_drain(f, sizeof(BufferRef)); >> +} >> + >> +static inline void fifo_freep(AVFifoBuffer **f) >> +{ >> + if (f) { >> + while (av_fifo_size(*f)) { >> + fifo_drain(*f, 1); >> + } >> + av_fifo_freep(f); >> + } >> +} >> + >> /** FrameReciever class - delegate for AVCaptureSession >> */ >> @interface AVFFrameReceiver : NSObject >> @@ -225,13 +276,16 @@ static void unlock_frames(AVFContext* ctx) >> didOutputSampleBuffer:(CMSampleBufferRef)videoFrame >> fromConnection:(AVCaptureConnection *)connection >> { >> + AVFifoBuffer *fifo = _context->video_fifo; >> + int64_t ts = av_gettime_relative(); >> lock_frames(_context); >> >> - if (_context->current_frame != nil) { >> - CFRelease(_context->current_frame); >> + if (av_fifo_space(fifo) == 0) { >> + av_log(_context, AV_LOG_DEBUG, "video fifo is full, the oldest >> frame has been dropped\n"); >> + fifo_drain(fifo, 1); >> } >> >> - _context->current_frame = (CMSampleBufferRef)CFRetain(videoFrame); >> + fifo_write(fifo, ts, videoFrame); >> >> unlock_frames(_context); >> >> @@ -269,13 +323,16 @@ static void unlock_frames(AVFContext* ctx) >> didOutputSampleBuffer:(CMSampleBufferRef)audioFrame >> fromConnection:(AVCaptureConnection *)connection >> { >> + AVFifoBuffer *fifo = _context->audio_fifo; >> + int64_t ts = av_gettime_relative(); >> lock_frames(_context); >> >> - if (_context->current_audio_frame != nil) { >> - CFRelease(_context->current_audio_frame); >> + if (!av_fifo_space(fifo)) { >> + av_log(_context, AV_LOG_DEBUG, "audio fifo is full, the oldest >> frame has been dropped\n"); >> + fifo_drain(fifo, 1); >> } >> >> - _context->current_audio_frame = >> (CMSampleBufferRef)CFRetain(audioFrame); >> + fifo_write(fifo, ts, audioFrame); >> >> unlock_frames(_context); >> >> @@ -301,12 +358,10 @@ static void destroy_context(AVFContext* ctx) >> ctx->avf_audio_delegate = NULL; >> >> av_freep(&ctx->audio_buffer); >> + fifo_freep(&ctx->video_fifo); >> + fifo_freep(&ctx->audio_fifo); >> >> pthread_mutex_destroy(&ctx->frame_lock); >> - >> - if (ctx->current_frame) { >> - CFRelease(ctx->current_frame); >> - } >> } >> >> static void parse_device_name(AVFormatContext *s) >> @@ -624,6 +679,7 @@ static int add_audio_device(AVFormatContext *s, >> AVCaptureDevice *audio_device) >> static int get_video_config(AVFormatContext *s) >> { >> AVFContext *ctx = (AVFContext*)s->priv_data; >> + BufferRef buf; >> CVImageBufferRef image_buffer; >> CMBlockBufferRef block_buffer; >> CGSize image_buffer_size; >> @@ -644,8 +700,13 @@ static int get_video_config(AVFormatContext *s) >> >> avpriv_set_pts_info(stream, 64, 1, avf_time_base); >> >> - image_buffer = CMSampleBufferGetImageBuffer(ctx->current_frame); >> - block_buffer = CMSampleBufferGetDataBuffer(ctx->current_frame); >> + fifo_peek(ctx->video_fifo, &buf); >> + if (buf.frame == nil) { >> + return 1; >> + } >> + >> + image_buffer = CMSampleBufferGetImageBuffer(buf.frame); >> + block_buffer = CMSampleBufferGetDataBuffer(buf.frame); >> >> if (image_buffer) { >> image_buffer_size = CVImageBufferGetEncodedSize(image_buffer); >> @@ -661,9 +722,6 @@ static int get_video_config(AVFormatContext *s) >> stream->codecpar->format = ctx->pixel_format; >> } >> >> - CFRelease(ctx->current_frame); >> - ctx->current_frame = nil; >> - >> unlock_frames(ctx); >> >> return 0; >> @@ -672,6 +730,7 @@ static int get_video_config(AVFormatContext *s) >> static int get_audio_config(AVFormatContext *s) >> { >> AVFContext *ctx = (AVFContext*)s->priv_data; >> + BufferRef buf; >> CMFormatDescriptionRef format_desc; >> AVStream* stream = avformat_new_stream(s, NULL); >> >> @@ -690,7 +749,12 @@ static int get_audio_config(AVFormatContext *s) >> >> avpriv_set_pts_info(stream, 64, 1, avf_time_base); >> >> - format_desc = >> CMSampleBufferGetFormatDescription(ctx->current_audio_frame); >> + fifo_peek(ctx->audio_fifo, &buf); >> + if (buf.frame == nil) { >> + return 1; >> + } >> + >> + format_desc = CMSampleBufferGetFormatDescription(buf.frame); >> const AudioStreamBasicDescription *basic_desc = >> CMAudioFormatDescriptionGetStreamBasicDescription(format_desc); >> >> if (!basic_desc) { >> @@ -737,7 +801,7 @@ static int get_audio_config(AVFormatContext *s) >> } >> >> if (ctx->audio_non_interleaved) { >> - CMBlockBufferRef block_buffer = >> CMSampleBufferGetDataBuffer(ctx->current_audio_frame); >> + CMBlockBufferRef block_buffer = >> CMSampleBufferGetDataBuffer(buf.frame); >> ctx->audio_buffer_size = >> CMBlockBufferGetDataLength(block_buffer); >> ctx->audio_buffer = >> av_malloc(ctx->audio_buffer_size); >> if (!ctx->audio_buffer) { >> @@ -746,9 +810,6 @@ static int get_audio_config(AVFormatContext *s) >> } >> } >> >> - CFRelease(ctx->current_audio_frame); >> - ctx->current_audio_frame = nil; >> - >> unlock_frames(ctx); >> >> return 0; >> @@ -771,6 +832,9 @@ static int avf_read_header(AVFormatContext *s) >> >> pthread_mutex_init(&ctx->frame_lock, NULL); >> >> + ctx->video_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); >> + ctx->audio_fifo = av_fifo_alloc_array(FIFO_SIZE, sizeof(BufferRef)); >> + >> #if !TARGET_OS_IPHONE && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1070 >> CGGetActiveDisplayList(0, NULL, &num_screens); >> #endif >> @@ -1051,33 +1115,52 @@ static int avf_read_packet(AVFormatContext *s, >> AVPacket *pkt) >> AVFContext* ctx = (AVFContext*)s->priv_data; >> >> do { >> + BufferRef video; >> + BufferRef audio; >> CVImageBufferRef image_buffer; >> CMBlockBufferRef block_buffer; >> lock_frames(ctx); >> >> - if (ctx->current_frame != nil) { >> + fifo_peek(ctx->video_fifo, &video); >> + fifo_peek(ctx->audio_fifo, &audio); >> + >> + if (video.frame != nil && audio.frame != nil) { >> + // process oldest CMSampleBufferRef first >> + if (audio.ts <= video.ts) { >> + video.frame = nil; >> + } else { >> + audio.frame = nil; >> + } >> + } >> + >> + if (video.frame != nil) { >> int status; >> int length = 0; >> >> - image_buffer = >> CMSampleBufferGetImageBuffer(ctx->current_frame); >> - block_buffer = >> CMSampleBufferGetDataBuffer(ctx->current_frame); >> + fifo_drain(ctx->video_fifo, 0); >> + unlock_frames(ctx); >> + >> + image_buffer = CMSampleBufferGetImageBuffer(video.frame); >> + block_buffer = CMSampleBufferGetDataBuffer(video.frame); >> >> if (image_buffer != nil) { >> length = (int)CVPixelBufferGetDataSize(image_buffer); >> } else if (block_buffer != nil) { >> length = (int)CMBlockBufferGetDataLength(block_buffer); >> } else { >> + CFRelease(video.frame); >> return AVERROR(EINVAL); >> } >> >> if (av_new_packet(pkt, length) < 0) { >> + CFRelease(video.frame); >> return AVERROR(EIO); >> } >> >> CMItemCount count; >> CMSampleTimingInfo timing_info; >> >> - if >> (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_frame, 1, >> &timing_info, &count) == noErr) { >> + if >> (CMSampleBufferGetOutputSampleTimingInfoArray(video.frame, 1, &timing_info, >> &count) == noErr) { >> AVRational timebase_q = av_make_q(1, >> timing_info.presentationTimeStamp.timescale); >> pkt->pts = pkt->dts = >> av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, >> avf_time_base_q); >> } >> @@ -1094,31 +1177,37 @@ static int avf_read_packet(AVFormatContext *s, >> AVPacket *pkt) >> status = AVERROR(EIO); >> } >> } >> - CFRelease(ctx->current_frame); >> - ctx->current_frame = nil; >> + CFRelease(video.frame); >> >> - if (status < 0) >> + if (status < 0) { >> return status; >> - } else if (ctx->current_audio_frame != nil) { >> - CMBlockBufferRef block_buffer = >> CMSampleBufferGetDataBuffer(ctx->current_audio_frame); >> + } >> + } else if (audio.frame != nil) { >> + CMBlockBufferRef block_buffer = >> CMSampleBufferGetDataBuffer(audio.frame); >> int block_buffer_size = >> CMBlockBufferGetDataLength(block_buffer); >> >> + fifo_drain(ctx->audio_fifo, 0); >> + unlock_frames(ctx); >> + >> if (!block_buffer || !block_buffer_size) { >> + CFRelease(audio.frame); >> return AVERROR(EIO); >> } >> >> if (ctx->audio_non_interleaved && block_buffer_size > >> ctx->audio_buffer_size) { >> + CFRelease(audio.frame); >> return AVERROR_BUFFER_TOO_SMALL; >> } >> >> if (av_new_packet(pkt, block_buffer_size) < 0) { >> + CFRelease(audio.frame); >> return AVERROR(EIO); >> } >> >> CMItemCount count; >> CMSampleTimingInfo timing_info; >> >> - if >> (CMSampleBufferGetOutputSampleTimingInfoArray(ctx->current_audio_frame, 1, >> &timing_info, &count) == noErr) { >> + if >> (CMSampleBufferGetOutputSampleTimingInfoArray(audio.frame, 1, &timing_info, >> &count) == noErr) { >> AVRational timebase_q = av_make_q(1, >> timing_info.presentationTimeStamp.timescale); >> pkt->pts = pkt->dts = >> av_rescale_q(timing_info.presentationTimeStamp.value, timebase_q, >> avf_time_base_q); >> } >> @@ -1131,6 +1220,7 @@ static int avf_read_packet(AVFormatContext *s, >> AVPacket *pkt) >> >> OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, >> 0, pkt->size, ctx->audio_buffer); >> if (ret != kCMBlockBufferNoErr) { >> + CFRelease(audio.frame); >> return AVERROR(EIO); >> } >> >> @@ -1162,12 +1252,12 @@ static int avf_read_packet(AVFormatContext *s, >> AVPacket *pkt) >> } else { >> OSStatus ret = CMBlockBufferCopyDataBytes(block_buffer, >> 0, pkt->size, pkt->data); >> if (ret != kCMBlockBufferNoErr) { >> + CFRelease(audio.frame); >> return AVERROR(EIO); >> } >> } >> >> - CFRelease(ctx->current_audio_frame); >> - ctx->current_audio_frame = nil; >> + CFRelease(audio.frame); >> } else { >> pkt->data = NULL; >> unlock_frames(ctx); >> @@ -1177,8 +1267,6 @@ static int avf_read_packet(AVFormatContext *s, >> AVPacket *pkt) >> return AVERROR(EAGAIN); >> } >> } >> - >> - unlock_frames(ctx); >> } while (!pkt->data); >> >> return 0; >> -- >> 2.29.2 >> >> > ping > ping > _______________________________________________ ffmpeg-devel mailing list [email protected] https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email [email protected] with subject "unsubscribe".
