[Libav-user] Syncing a seperate audio and video stream

Ludwig Ertl Tue, 20 Mar 2012 11:16:21 -0700

Hi,

I want to use ffmpeg to decode Audio and video streams by a
Trendnet TV-IP512P IP Camera in order to feed it to ffserver for streaming.
I found a document on the Internet which is describing the video and
audio format container used by the IP camera:
http://www.paillassou.com/DCS-2121/CGI_2121.pdf
See Section 8.2 (Advanced ip-Camera Stream(ACS) Header.


Unfortunately, Audio and Video streams are served seperately under different
URLs (See Section 4.1.5 and 4.1.6): /video/ACVS.cgi and /audio/ACAS.cgi

Now I wrote 2 decoding plugins for libavformat (which I'd like to
contribute to libavformat once they are working as expected), which I have
attached in this message.
They basically work fine, but as I'm completely new to libav, I don't have
an idea how to sync those 2 streams together.
Currently, what I'm doing for testing is:

ffmpeg -i http://admin:[email protected]/video/ACVS.cgi -i
http://admin:[email protected]/audio/ACAS.cgi test.mpg

So I have 2 seperate streams which are of course out of sync.
There is a timestamp field in the frame-Header of each audio/videoframe,
which
is just a unix-timestamp with msec precision, both from the same clock
source.
So this information could be used to sync the streams, but I have no clue
how
this could possibly work, as Audio- and Videodecoder plugins don't know any-
thing from each other and even if they would (via an external variable or
some
ugly hack like that), I don't have a clue how to sync them. I suspect that
it
may have something to do with PTS and DTS timestamps, but I don't know how
they are used for audio and video sync in seperate streams.

Can you give me some hints on how to solve this problem? I can provide
sample
data from the camera, if you want.
As you may see from the code, I'm also quite unsure how to correctly
calculate
PTS (and possibly DTS) values, but the current code at least somehow works,
if I don't use any seeking or offsets (i.e. ffmpeg -ss parameter).

I hope that there is a possibility for synchronisation.
What I have already tried was using the clock as PTS for both audio and
video:

av_set_pts_info(st, 64, 1, 1000000);  /* 64 bits pts in us */
pkt->pts = ac->hdr.ulTimeSec * 1000000LL + ac->hdr.ulTimeUSec;

But this just resulted in a totally garbled video stream.

Regards,
DI (FH) Ludwig Ertl
Software Engineering
Computer Software Production GmbH
Fockygasse 4
1120 Wien
Web: http://www.csp.at

/*
 * ACS (Advanced ip-Camera Stream) demuxer
 * Copyright (c) 2012 DI(FH) Ludwig Ertl / CSP GmbH
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "avformat.h"
#include "avio_internal.h"
#include "pcm.h"
#include "riff.h"

/* Various timing options for PTS, don't know what is best, some of them are 
probably nonsense */
#define TIMER_NON       0       // No timing, let ffmpeg estimate...
#define TIMER_SRT       1       // Timing via sample rate, probably most 
compatible
#define TIMER_SEQ       2       // Timing via SequenceNumber in header
#define TIMER_TIM       3       // Timing via timestamp

#define TIMER   TIMER_SRT

/* The ffmpeg codecs we support, and the IDs they have in the file */
static const AVCodecTag codec_acs_tags[] = {
    { CODEC_ID_ADPCM_MS, 0 },
    { CODEC_ID_PCM_MULAW, 1 },
    { CODEC_ID_PCM_ALAW, 2 },
    { CODEC_ID_ADPCM_IMA_ISS, 4},       // Not quite sure if this maps 
correctly to OSS AFMT_IMA_ADPC
    { CODEC_ID_PCM_U8, 8 },
    { CODEC_ID_PCM_S16LE, 0x10 },
    { CODEC_ID_PCM_S16BE, 0x20 },
    { CODEC_ID_PCM_S8, 0x40 },
    { CODEC_ID_PCM_U16LE, 0x80 },
    { CODEC_ID_PCM_U16BE, 0x100 },
    { CODEC_ID_MP2, 0x200 },
    { CODEC_ID_AC3, 0x400 },
    { CODEC_ID_AMR_NB, 0x800 }          // Not quite sure if it's narrow or 
wide band (=AFMT_AMR)
};


typedef struct _ACS_AudioHeader
{
    unsigned long ulHdrID; //Header ID
    unsigned long ulHdrLength;
    unsigned long ulDataLength;
    unsigned long ulSequenceNumber;
    unsigned long ulTimeSec;
    unsigned long ulTimeUSec;
    unsigned long ulDataCheckSum;
    unsigned short usFormat;
    unsigned short usChannels;
    unsigned short usSampleRate;
    unsigned short usSampleBits;
    unsigned long ulReserved;
} ACS_AudioHeader, *PACS_AudioHeader;

typedef struct {
    ACS_AudioHeader hdr;
#if TIMER==TIMER_SEQ
    unsigned long ulFirstFrame;
#elif TIMER==TIMER_SRT
    int64_t llFrame;
#endif
    int bReadHeader;
} ACSContext;


static int acs_probe(AVProbeData *p)
{
    /* check file header */
    if (p->buf[0] == 0x00 && p->buf[1] == 0x00 &&
        p->buf[2] == 0x01 && p->buf[3] == 0xF6)
        return AVPROBE_SCORE_MAX;
    else
        return 0;
}

static int acs_read_block_header(AVFormatContext *ctx, AVIOContext *pb)
{
    ACSContext *ac = ctx->priv_data;

    ac->hdr.ulHdrID = avio_rl32(pb);
    if (ac->hdr.ulHdrID != 0xF6010000)
    {
        av_log(ctx, AV_LOG_ERROR, "Incorrect header: %08lX\n", ac->hdr.ulHdrID);
        return -1;
    }
    ac->hdr.ulHdrLength = avio_rl32(pb); /* header size */
    ac->hdr.ulDataLength = avio_rl32(pb); /* data size */

    ac->hdr.ulSequenceNumber = avio_rl32(pb);
    ac->hdr.ulTimeSec = avio_rl32(pb);
    ac->hdr.ulTimeUSec = avio_rl32(pb);
    ac->hdr.ulDataCheckSum = avio_rl32(pb);

    ac->hdr.usFormat = avio_rl16(pb);
    ac->hdr.usChannels = avio_rl16(pb);
    ac->hdr.usSampleRate = avio_rl16(pb);
    ac->hdr.usSampleBits = avio_rl16(pb);
    ac->hdr.ulReserved = avio_rl32(pb);

#if TIMER==TIMER_SEQ
    if (!ac->ulFirstFrame && ac->hdr.ulSequenceNumber) ac->ulFirstFrame = 
ac->hdr.ulSequenceNumber;
#endif
    return 0;
}

static int acs_read_header(AVFormatContext *s,
                          AVFormatParameters *ap)
{
    AVIOContext *pb = s->pb;
    ACSContext *ac = s->priv_data;
    enum CodecID codec;
    AVStream *st;

    if(acs_read_block_header(s, pb) < 0)
        return -1;
    codec = ff_codec_get_id(codec_acs_tags, ac->hdr.usFormat);

    /* now we are ready: build format streams */
    st = av_new_stream(s, 0);
    if (!st)
        return -1;
    st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
    st->codec->codec_tag = ac->hdr.usFormat;
    st->codec->codec_id = codec;
    st->codec->channels = ac->hdr.usChannels;
    st->codec->sample_rate = ac->hdr.usSampleRate;
    st->codec->bits_per_coded_sample = ac->hdr.usSampleBits;
    st->codec->bit_rate = st->codec->sample_rate * 
st->codec->bits_per_coded_sample * st->codec->channels;
    ac->bReadHeader = 1;

#if TIMER==TIMER_SRT || TIMER==TIMER_NON
    av_set_pts_info(st, 64, 1, ac->hdr.usSampleRate);
#elif TIMER==TIMER_TIM
    av_set_pts_info(st, 64, 1, 1000000);  /* 64 bits pts in us */
#elif TIMER==TIMER_SEQ
    av_set_pts_info(st, 64, 1, 16);
#endif
    return 0;
}

static int acs_read_packet(AVFormatContext *s,
                          AVPacket *pkt)
{
    ACSContext *ac = s->priv_data;
    int ret, chunklen =  ac->hdr.ulHdrLength +  ac->hdr.ulDataLength;
    int64_t remain;

    if (chunklen && (remain = (avio_tell(s->pb) % chunklen)))
    {
        // Seek may have occured, so we are not aligned properly.
        // So don't read header, just read remaining packet
        // av_log(s, AV_LOG_ERROR, "within a packet, skipping over %lld 
bytes\n", chunklen - remain);

        ret = av_get_packet(s->pb, pkt, chunklen - remain);
        ac->bReadHeader=0;
    }
    else
    {
        if (ac->bReadHeader) ac->bReadHeader=0;
        else if ((ret = acs_read_block_header (s, s->pb))<0) return ret;
        ret= av_get_packet(s->pb, pkt, ac->hdr.ulDataLength);
    }

    if (ret < 0)
        return ret;
    pkt->stream_index = 0;

#if TIMER==TIMER_SRT
    pkt->pts = ac->llFrame;
    ac->llFrame += ac->hdr.ulDataLength / ac->hdr.usChannels / 
(ac->hdr.usSampleBits >> 3);
#elif TIMER==TIMER_TIM
    pkt->pts = ac->hdr.ulTimeSec * 1000000LL + ac->hdr.ulTimeUSec;
#elif TIMER==TIMER_SEQ
    pkt->pts = ac->hdr.ulSequenceNumber - ac->ulFirstFrame;
#endif

    /* note: we need to modify the packet size here to handle the last
       packet */
    pkt->size = ret;
    return 0;
}

AVInputFormat ff_acs_demuxer = {
    "acs",
    NULL_IF_CONFIG_SMALL("Advanced ip-Camera Stream(ACS) Audio"),
    sizeof(ACSContext),
    acs_probe,
    acs_read_header,
    acs_read_packet,
    NULL,
    pcm_read_seek,
    .codec_tag= (const AVCodecTag* const []){codec_acs_tags, 0},
};

/*
 * ACS (Advanced ip-Camera Stream) demuxer
 * Copyright (c) 2012 DI(FH) Ludwig Ertl / CSP GmbH
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "avformat.h"


typedef struct _ACS_VideoHeader
{
    unsigned long ulHdrID; //Header ID
    unsigned long ulHdrLength;
    unsigned long ulDataLength;
    unsigned long ulSequenceNumber;
    unsigned long ulTimeSec;
    unsigned long ulTimeUSec;
    unsigned long ulDataCheckSum;
    unsigned short usCodingType;
    unsigned short usFrameRate;
    unsigned short usWidth;
    unsigned short usHeight;
    unsigned char ucMDBitmap;
    unsigned char ucMDPowers[3];
} ACS_VideoHeader, *PACS_VideoHeader;

typedef struct {
    ACS_VideoHeader hdr;
    unsigned long ulFirstFrame;
    int bReadHeader;
} ACSVContext;

static int acsv_probe(AVProbeData *p)
{
    /* check file header */
    if (p->buf[0] == 0x00 && p->buf[1] == 0x00 &&
        p->buf[2] == 0x01 && p->buf[3] == 0xF5)
        return AVPROBE_SCORE_MAX;
    else
        return 0;
}

static int acsv_read_block_header(AVFormatContext *ctx, AVIOContext *pb)
{
    ACSVContext *ac = ctx->priv_data;
    unsigned long ulReserved;

    ac->hdr.ulHdrID = avio_rl32(pb);
    if (ac->hdr.ulHdrID != 0xF5010000)
    {
        // May be HTTP/1.1 error, inform user about it, but now we are 
basically doomed...
        if (ac->hdr.ulHdrID == 0x50545448)
        {
            int j = 0;
            char szBuf[1024]={0};

            do
            {
                while ((szBuf[j++] = (char)avio_r8(pb))!='\n' && 
j<sizeof(szBuf)-1);
            } while (j<sizeof(szBuf)-1);
            av_log(ctx, AV_LOG_ERROR, "HTTP header in between 
transaction:\nHTTP%s\n", szBuf);
            return -1;
        }
        av_log(ctx, AV_LOG_ERROR, "Incorrect header: %08lX\n", ac->hdr.ulHdrID);
        return -1;
    }
    ac->hdr.ulHdrLength = avio_rl32(pb); /* header size */
    ac->hdr.ulDataLength = avio_rl32(pb); /* data size */

    ac->hdr.ulSequenceNumber = avio_rl32(pb);
    ac->hdr.ulTimeSec = avio_rl32(pb);
    ac->hdr.ulTimeUSec = avio_rl32(pb);
    ac->hdr.ulDataCheckSum = avio_rl32(pb);

    ac->hdr.usCodingType = avio_rl16(pb);
    ac->hdr.usFrameRate = avio_rl16(pb);
    ac->hdr.usWidth = avio_rl16(pb);
    ac->hdr.usHeight = avio_rl16(pb);
    ulReserved = avio_rl32(pb);
    memcpy (&ac->hdr.ucMDBitmap, (void*)&ulReserved, sizeof(ulReserved));
    if (!ac->ulFirstFrame && ac->hdr.ulSequenceNumber) ac->ulFirstFrame = 
ac->hdr.ulSequenceNumber;
    return 0;
}

static int acsv_read_header(AVFormatContext *s, AVFormatParameters *ap)
{
    AVIOContext *pb = s->pb;
    ACSVContext *ac = s->priv_data;
    AVStream *st;

    if(acsv_read_block_header(s, pb) < 0)
        return -1;

    st = av_new_stream(s, 0);
    if (!st)
        return AVERROR(ENOMEM);

    st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
    st->codec->codec_tag  = ac->hdr.usCodingType;
    st->codec->codec_id   = 
ac->hdr.usCodingType==5?CODEC_ID_MJPEG:CODEC_ID_MPEG4;
    st->codec->width      = ac->hdr.usWidth;
    st->codec->height     = ac->hdr.usHeight;
    st->need_parsing = AVSTREAM_PARSE_FULL;

    av_set_pts_info(st, 32, 1, ac->hdr.usFrameRate);    // 30 fps
    //av_set_pts_info(st, 64, 1, 1000000);  /* 64 bits pts in us */
    ac->bReadHeader = 1;
    return 0;
}

static int acsv_read_packet(AVFormatContext *s, AVPacket *pkt)
{
    ACSVContext *ac = s->priv_data;
    int ret;

    if (ac->bReadHeader) ac->bReadHeader=0;
    else if ((ret = acsv_read_block_header (s, s->pb))<0) return ret;
    ret= av_get_packet(s->pb, pkt, ac->hdr.ulDataLength);
    if (ret < 0)
        return ret;
    pkt->stream_index = 0;

    if (ac->hdr.ulTimeSec)
    {
//        pkt->pts = ac->hdr.ulTimeSec * 1000000LL + ac->hdr.ulTimeUSec;

//        pkt->pts = ac->llFrameNumber;
//        ac->llFrameNumber++;
         pkt->pts = ac->hdr.ulSequenceNumber - ac->ulFirstFrame;
//av_log(s, AV_LOG_ERROR, "pts: %lld\n", pkt->pts);
    }

    //pkt->pos-=16;
    return ret;
}

AVInputFormat ff_acsv_demuxer = {
    "acsv",
    NULL_IF_CONFIG_SMALL("Advanced ip-Camera Stream(ACS) Video"),
    sizeof(ACSVContext),
    acsv_probe,
    acsv_read_header,
    acsv_read_packet,
    .flags= AVFMT_GENERIC_INDEX,
    .value = CODEC_ID_MPEG4,
};

_______________________________________________
Libav-user mailing list
[email protected]
http://ffmpeg.org/mailman/listinfo/libav-user

[Libav-user] Syncing a seperate audio and video stream

Reply via email to