[Libav-user] Decoding a network video stream

Sergiu Giurgiu Tue, 13 Aug 2019 11:59:01 -0700

Hi,

I am using libav in a C++ application that acts as a web server. It (theapp) provides a websocket endpoint to which a webm video is beingstreamed by the client, which is a normal browser that accesses theclient's webcam. If I save the received data into a file, it can besuccessfully played by a media player.What I want to do with the data, however, is to extract/decode the videointo frames, in a std::vector<cv::Mat> which then will be sent forfurther processing by some other library. What I was expecting to happenwas for the frames that make up the video to be extracted on the fly, asI receive the data.

So, what I have made was a class MediaDecoder, that has one method:std::vector<cv::Mat> decode(const char* data, size_t length);Essentially, send data to it and return whatever frames the decoder mayhave been able to pick up.

The custom IO buffer is:
    struct Buffer
    {
        std::vector<uint8_t> data;
        int currentPos = 0;
    };

In the decode method:
    std::vector<cv::Mat> frames;
    //add data to the buffer
    for(size_t i=0;i<length;i++) {
        buf.data.push_back(data[i]);
    }

    //do not invoke the decoders until we have 1MB of data

if(((buf.data.size() - buf.currentPos) < 1*1024*1024) &&!initializedCodecs) return frames;

And then, if I'm invoked for the first time, I setup the entire chain:custom IO, format context, etc.:

if(!initializedCodecs) //initialize ffmpeg objects. Custom I/O,format, decoder, etc.

    {
        avioCtxPtr = std::unique_ptr<AVIOContext,avio_context_deleter>(
avio_alloc_context((uint8_t*)av_malloc(4096),4096,0,&buf,&readAVBuffer,nullptr,&seekAVBuffer),
                    avio_context_deleter());
        if(!avioCtxPtr)
        {
            std::cerr << "Could not create IO buffer" << std::endl;
            return frames;
        }

fmt_ctx =std::unique_ptr<AVFormatContext,avformat_context_deleter>(avformat_alloc_context(),

avformat_context_deleter());
        fmt_ctx->pb = avioCtxPtr.get();

fmt_ctx->flags |=AVFMT_FLAG_CUSTOM_IO|AVFMT_FLAG_NOBUFFER|AVFMT_FLAG_GENPTS ; fmt_ctx->max_analyze_duration = 2 * AV_TIME_BASE; // read 2seconds of data

        {
            AVFormatContext *fmtCtxRaw = fmt_ctx.get();

if (avformat_open_input(&fmtCtxRaw, "", nullptr, nullptr) <0) {

                std::cerr << "Could not open movie" << std::endl;
                return frames;
            }
        }
        if (avformat_find_stream_info(fmt_ctx.get(), nullptr) < 0) {
            std::cerr << "Could not find stream information" << std::endl;
            return frames;
        }

if((video_stream_idx = av_find_best_stream(fmt_ctx.get(),AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0)) < 0)

        {
            std::cerr << "Could not find video stream" << std::endl;
            return frames;
        }
        AVStream *video_stream = fmt_ctx->streams[video_stream_idx];

AVCodec *dec =avcodec_find_decoder(video_stream->codecpar->codec_id);

video_dec_ctx =std::unique_ptr<AVCodecContext,avcodec_context_deleter>(avcodec_alloc_context3(dec),

avcodec_context_deleter());
        if (!video_dec_ctx)
        {

std::cerr << "Failed to allocate the video codec context"<< std::endl;

            return frames;
        }
avcodec_parameters_to_context(video_dec_ctx.get(),video_stream->codecpar);
        video_dec_ctx->thread_count = 1;
       /* video_dec_ctx->max_b_frames = 0;
        video_dec_ctx->frame_skip_threshold = 10;*/

        AVDictionary *opts = nullptr;
        av_dict_set(&opts, "refcounted_frames", "1", 0);
        av_dict_set(&opts, "deadline", "1", 0);
        av_dict_set(&opts, "auto-alt-ref", "0", 0);
        av_dict_set(&opts, "lag-in-frames", "1", 0);
        av_dict_set(&opts, "rc_lookahead", "1", 0);
        av_dict_set(&opts, "drop_frame", "1", 0);
        av_dict_set(&opts, "error-resilient", "1", 0);

        int width = video_dec_ctx->width;
        videoHeight = video_dec_ctx->height;

        if(avcodec_open2(video_dec_ctx.get(), dec, &opts) < 0)
        {

std::cerr << "Failed to open the video codec context" <<std::endl;

            return frames;
        }

        AVPixelFormat  pFormat = AV_PIX_FMT_BGR24;

img_convert_ctx =std::unique_ptr<SwsContext,swscontext_deleter>(sws_getContext(width,videoHeight,

video_dec_ctx->pix_fmt,   width, videoHeight, pFormat,

SWS_BICUBIC, nullptr,nullptr,nullptr),swscontext_deleter());

frame =std::unique_ptr<AVFrame,avframe_deleter>(av_frame_alloc(),avframe_deleter()); frameRGB =std::unique_ptr<AVFrame,avframe_deleter>(av_frame_alloc(),avframe_deleter());

int numBytes = av_image_get_buffer_size(pFormat, width,videoHeight,32/*https://stackoverflow.com/questions/35678041/what-is-linesize-alignment-meaning*/); std::unique_ptr<uint8_t,avbuffer_deleter> imageBuffer((uint8_t*) av_malloc(numBytes*sizeof(uint8_t)),avbuffer_deleter());

av_image_fill_arrays(frameRGB->data,frameRGB->linesize,imageBuffer.get(),pFormat,width,videoHeight,32);
        frameRGB->width = width;
        frameRGB->height = videoHeight;

        initializedCodecs = true;
    }


after that is done, upon every call I have the reading loop:

    AVPacket pkt;
    av_init_packet(&pkt);
    pkt.data = nullptr;
    pkt.size = 0;

    int read_frame_return = 0;
    while ( (read_frame_return=av_read_frame(fmt_ctx.get(), &pkt)) >= 0)
    {
readFrame(&frames,&pkt,video_dec_ctx.get(),frame.get(),img_convert_ctx.get(),
                  videoHeight,frameRGB.get());
    }
    avioCtxPtr->eof_reached = 0;
    avioCtxPtr->error = 0;



The custom IO readAVBuffer method looks like this:

int MediaDecoder::readAVBuffer(void* opaque, uint8_t* buf, int buf_size)
{
    MediaDecoder::Buffer* mbuf = (MediaDecoder::Buffer*)opaque;
    int count = 0;
    for(int i=0;i<buf_size;i++)
    {
        int index = i + mbuf->currentPos;
        if(index >= (int)mbuf->data.size())
        {
            break;
        }
        count++;
        buf[i] = mbuf->data.at(index);
    }
    if(count > 0) mbuf->currentPos+=count;

std::cout << "read : "<<count<<" "<<mbuf->currentPos<<", buffsize:"<<mbuf->data.size() << std::endl;

    if(count <= 0) return AVERROR(EAGAIN);
    return count;
}



As it is I get the following output:

read : 4096 , buff size:1274923  //we read 4K

[matroska,webm @ 0x63ccef0] Found unknown-length element with ID0x18538067 at pos. 0x104 for which no syntax for parsing is available.

[matroska,webm @ 0x63ccef0] Duplicate element
[matroska,webm @ 0x63ccef0] Duplicate element
//uh oh, apparently the decoder is not happy

//then we go on and read a bunch from our buffer, until we're startingto get these errors (lots of them):


[vp8 @ 0x63d0590] Discarding interframe without a prior keyframe!
Error sending packet for decoding -1094995529
[vp8 @ 0x63d0590] Discarding interframe without a prior keyframe!
Error sending packet for decoding -1094995529

Then we go on and we finish reading from the buffer completely, untilthe remaining size is zero:

[matroska,webm @ 0x63ccef0] Read error at pos. 1274923 (0x13742b)

Now, here, various things could happen, one of them being that anyfurther calls to av_read_frame will no longer read from the buffer. It'sconsidered EOF. Other times, it's simply just a segfault with "corruptedsize vs. prev_size" message. Never been able to get that crash whilerunning in the debugger.

I am using libav in some very very wrong way, but I am completelyclueless on what would be the correct way to use it. Any help isappreciated.


Thank you,
Sergiu.





_______________________________________________
Libav-user mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/libav-user

To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".

[Libav-user] Decoding a network video stream

Reply via email to