[Libav-user] [PATCH] New API usage example (reading, converting, encoding and muxing an audio file)

Paolo Prete via Libav-user Mon, 06 Jun 2022 08:37:52 -0700

Hello,
>From what I see, the doc/examples files can be still improved. More precisely, 
>their main() is often split into functions with short names that hide 
>important (sometime essential) details of what they're doing and they worsen 
>the readability, by forcing the user to jump from chunks to chunks of the 
>code. See for example: muxing.c (and the open_audio/video(), 
>write_audio/video_frame()...) In addition: they are hard to adapt to practical 
>cases without doing big modifications.Please consider to review/push the 
>example I provide with this email (as a patch for the dev branch, and as a 
>separate file which can be compiled from 5.0 version): 
>convert_encode_mux_audio.cIt is deliberately written in a more "procedural" 
>way, it should be much easier to read and it covers all the basic steps 
>(reading from file, converting, encoding, muxing) of a common and practical 
>pipeline made with ffMPEG.
Hope it helps.
P

From 86db1c6dcd6f69028c9c25d03e6e19ef9fc2b331 Mon Sep 17 00:00:00 2001
From: paolo <[email protected]>
Date: Mon, 6 Jun 2022 15:34:05 +0200
Subject: [PATCH] Added a complete example - with clear, well readable and
 sequential code - for doing basic operations on a raw audio file. It improves
 the current state of the examples directory

---
 configure                               |   2 +
 doc/examples/Makefile                   |  47 ++--
 doc/examples/Makefile.example           |   1 +
 doc/examples/convert_encode_mux_audio.c | 351 ++++++++++++++++++++++++
 4 files changed, 378 insertions(+), 23 deletions(-)
 create mode 100644 doc/examples/convert_encode_mux_audio.c

diff --git a/configure b/configure
index 5a167613a4..775750aff0 100755
--- a/configure
+++ b/configure
@@ -1725,6 +1725,7 @@ COMPONENT_LIST="
 EXAMPLE_LIST="
     avio_list_dir_example
     avio_reading_example
+    convert_encode_mux_audio_example
     decode_audio_example
     decode_video_example
     demuxing_decoding_example
@@ -3760,6 +3761,7 @@ yadif_videotoolbox_filter_deps="metal corevideo videotoolbox"
 # examples
 avio_list_dir_deps="avformat avutil"
 avio_reading_deps="avformat avcodec avutil"
+convert_encode_mux_audio_example_deps="avcodec avformat avutil swresample"
 decode_audio_example_deps="avcodec avutil"
 decode_video_example_deps="avcodec avutil"
 demuxing_decoding_example_deps="avcodec avformat avutil"
diff --git a/doc/examples/Makefile b/doc/examples/Makefile
index 81bfd34d5d..aed73e811c 100644
--- a/doc/examples/Makefile
+++ b/doc/examples/Makefile
@@ -1,26 +1,27 @@
-EXAMPLES-$(CONFIG_AVIO_LIST_DIR_EXAMPLE)     += avio_list_dir
-EXAMPLES-$(CONFIG_AVIO_READING_EXAMPLE)      += avio_reading
-EXAMPLES-$(CONFIG_DECODE_AUDIO_EXAMPLE)      += decode_audio
-EXAMPLES-$(CONFIG_DECODE_VIDEO_EXAMPLE)      += decode_video
-EXAMPLES-$(CONFIG_DEMUXING_DECODING_EXAMPLE) += demuxing_decoding
-EXAMPLES-$(CONFIG_ENCODE_AUDIO_EXAMPLE)      += encode_audio
-EXAMPLES-$(CONFIG_ENCODE_VIDEO_EXAMPLE)      += encode_video
-EXAMPLES-$(CONFIG_EXTRACT_MVS_EXAMPLE)       += extract_mvs
-EXAMPLES-$(CONFIG_FILTER_AUDIO_EXAMPLE)      += filter_audio
-EXAMPLES-$(CONFIG_FILTERING_AUDIO_EXAMPLE)   += filtering_audio
-EXAMPLES-$(CONFIG_FILTERING_VIDEO_EXAMPLE)   += filtering_video
-EXAMPLES-$(CONFIG_HTTP_MULTICLIENT_EXAMPLE)  += http_multiclient
-EXAMPLES-$(CONFIG_HW_DECODE_EXAMPLE)         += hw_decode
-EXAMPLES-$(CONFIG_METADATA_EXAMPLE)          += metadata
-EXAMPLES-$(CONFIG_MUXING_EXAMPLE)            += muxing
-EXAMPLES-$(CONFIG_QSVDEC_EXAMPLE)            += qsvdec
-EXAMPLES-$(CONFIG_REMUXING_EXAMPLE)          += remuxing
-EXAMPLES-$(CONFIG_RESAMPLING_AUDIO_EXAMPLE)  += resampling_audio
-EXAMPLES-$(CONFIG_SCALING_VIDEO_EXAMPLE)     += scaling_video
-EXAMPLES-$(CONFIG_TRANSCODE_AAC_EXAMPLE)     += transcode_aac
-EXAMPLES-$(CONFIG_TRANSCODING_EXAMPLE)       += transcoding
-EXAMPLES-$(CONFIG_VAAPI_ENCODE_EXAMPLE)      += vaapi_encode
-EXAMPLES-$(CONFIG_VAAPI_TRANSCODE_EXAMPLE)   += vaapi_transcode
+EXAMPLES-$(CONFIG_AVIO_LIST_DIR_EXAMPLE)            += avio_list_dir
+EXAMPLES-$(CONFIG_AVIO_READING_EXAMPLE)             += avio_reading
+EXAMPLES-$(CONFIG_CONVERT_ENCODE_MUX_AUDIO_EXAMPLE) += convert_encode_mux_audio
+EXAMPLES-$(CONFIG_DECODE_AUDIO_EXAMPLE)             += decode_audio
+EXAMPLES-$(CONFIG_DECODE_VIDEO_EXAMPLE)             += decode_video
+EXAMPLES-$(CONFIG_DEMUXING_DECODING_EXAMPLE)        += demuxing_decoding
+EXAMPLES-$(CONFIG_ENCODE_AUDIO_EXAMPLE)             += encode_audio
+EXAMPLES-$(CONFIG_ENCODE_VIDEO_EXAMPLE)             += encode_video
+EXAMPLES-$(CONFIG_EXTRACT_MVS_EXAMPLE)              += extract_mvs
+EXAMPLES-$(CONFIG_FILTER_AUDIO_EXAMPLE)             += filter_audio
+EXAMPLES-$(CONFIG_FILTERING_AUDIO_EXAMPLE)          += filtering_audio
+EXAMPLES-$(CONFIG_FILTERING_VIDEO_EXAMPLE)          += filtering_video
+EXAMPLES-$(CONFIG_HTTP_MULTICLIENT_EXAMPLE)         += http_multiclient
+EXAMPLES-$(CONFIG_HW_DECODE_EXAMPLE)                += hw_decode
+EXAMPLES-$(CONFIG_METADATA_EXAMPLE)                 += metadata
+EXAMPLES-$(CONFIG_MUXING_EXAMPLE)                   += muxing
+EXAMPLES-$(CONFIG_QSVDEC_EXAMPLE)                   += qsvdec
+EXAMPLES-$(CONFIG_REMUXING_EXAMPLE)                 += remuxing
+EXAMPLES-$(CONFIG_RESAMPLING_AUDIO_EXAMPLE)         += resampling_audio
+EXAMPLES-$(CONFIG_SCALING_VIDEO_EXAMPLE)            += scaling_video
+EXAMPLES-$(CONFIG_TRANSCODE_AAC_EXAMPLE)            += transcode_aac
+EXAMPLES-$(CONFIG_TRANSCODING_EXAMPLE)              += transcoding
+EXAMPLES-$(CONFIG_VAAPI_ENCODE_EXAMPLE)             += vaapi_encode
+EXAMPLES-$(CONFIG_VAAPI_TRANSCODE_EXAMPLE)          += vaapi_transcode
 
 EXAMPLES       := $(EXAMPLES-yes:%=doc/examples/%$(PROGSSUF)$(EXESUF))
 EXAMPLES_G     := $(EXAMPLES-yes:%=doc/examples/%$(PROGSSUF)_g$(EXESUF))
diff --git a/doc/examples/Makefile.example b/doc/examples/Makefile.example
index a232d97f98..f2438caeb8 100644
--- a/doc/examples/Makefile.example
+++ b/doc/examples/Makefile.example
@@ -13,6 +13,7 @@ LDLIBS := $(shell pkg-config --libs $(FFMPEG_LIBS)) $(LDLIBS)
 
 EXAMPLES=       avio_list_dir                      \
                 avio_reading                       \
+                convert_encode_mux_audio           \
                 decode_audio                       \
                 decode_video                       \
                 demuxing_decoding                  \
diff --git a/doc/examples/convert_encode_mux_audio.c b/doc/examples/convert_encode_mux_audio.c
new file mode 100644
index 0000000000..abde611bf6
--- /dev/null
+++ b/doc/examples/convert_encode_mux_audio.c
@@ -0,0 +1,351 @@
+/*
+ * Copyright (c) 2019 Paolo Prete (paolopr976 at gmail.com)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/**
+ * @file
+ * API example for converting, encoding and muxing raw audio files.
+ * This example reads a raw audio input file, converts it to a sample format accepted by
+ * a given encoder, performs encoding and puts the encoded frames into a muxing container.
+ * The encoded stream is written to a file named "outfile".
+ * It can be adapted, with few changes, to a custom raw audio source (i.e: a live one).
+ * It uses a custom I/O write callback (write_muxed_data()) in order to show how to access
+ * muxed packets written in memory, before they are written to the output file.
+ *
+ * The raw input audio file can be created with:
+ *
+ * ffmpeg -i some_audio_file -f f32le -acodec pcm_f32le -ac 2 -ar 48000 raw_audio_file.raw
+ *
+ * @example convert_encode_mux_audio.c
+ */
+
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+#include <libavutil/timestamp.h>
+#include <libswresample/swresample.h>
+
+// Customizable parameters
+#define SAMPLE_RATE 48000
+#define ENCODER_ID AV_CODEC_ID_AAC
+#define ENCODER_BITRATE 64000
+#define INPUT_SAMPLE_FMT AV_SAMPLE_FMT_FLT
+#define OUTPUT_SAMPLE_FMT AV_SAMPLE_FMT_FLTP
+#define INPUT_CHANNELS 2
+#define OUTPUT_CHANNELS 2
+
+static int encoded_pkt_counter = 1;
+
+static int write_muxed_data(void *opaque, uint8_t *data, int size)
+{
+    FILE *encoded_audio_file = (FILE *)opaque;
+    fwrite(data, 1, size, encoded_audio_file); //(h)
+    return size;
+}
+
+static int mux_encoded_packet(AVPacket *encoded_audio_packet, AVFormatContext *out_container_ctx)
+{
+    int ret_val;
+    if ((ret_val = av_write_frame(out_container_ctx, encoded_audio_packet)) < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Error calling av_write_frame() (error '%s')\n", av_err2str(ret_val));
+    } else {
+        av_log(NULL, AV_LOG_INFO, "Encoded packet %d, size=%d, pts_time=%s\n",
+               encoded_pkt_counter, encoded_audio_packet->size,
+               av_ts2timestr(encoded_audio_packet->pts, &out_container_ctx->streams[0]->time_base));
+    }
+    return ret_val;
+}
+
+static int check_if_samplerate_is_supported(const AVCodec *audio_codec, int samplerate)
+{
+    const int *samplerates_list = audio_codec->supported_samplerates;
+    while (*samplerates_list) {
+        if (*samplerates_list == samplerate)
+            return 0;
+        samplerates_list++;
+    }
+    return 1;
+}
+
+int main(int argc, char **argv)
+{
+    FILE *input_audio_file = NULL, *encoded_audio_file = NULL;
+    AVChannelLayout in_ch_layout;
+    AVChannelLayout out_ch_layout;
+    const AVCodec *audio_codec = NULL;
+    AVCodecContext *audio_encoder_ctx = NULL;
+    AVFrame *input_audio_frame = NULL, *converted_audio_frame = NULL;
+    SwrContext *audio_convert_context = NULL;
+    const AVOutputFormat *out_container = NULL;
+    AVFormatContext *out_container_ctx = NULL;
+    uint8_t *out_container_buffer = NULL;
+    size_t out_container_buffer_size = 4096;
+    AVIOContext *avio_ctx = NULL;
+    AVStream *out_stream = NULL;
+    AVPacket *encoded_audio_packet = NULL;
+    int ret_val = 0;
+    int audio_bytes_to_encode;
+
+    if (argc != 3) {
+        printf("Usage: %s <raw audio input file (INPUT_CHANNELS, INPUT_SAMPLE_FMT, SAMPLE_RATE)>, <container format>\n", argv[0]);
+        return 1;
+    }
+
+    input_audio_file = fopen(argv[1], "rb");
+    if (!input_audio_file) {
+        av_log(NULL, AV_LOG_ERROR, "Could not open input audio file\n");
+        return AVERROR_EXIT;
+    }
+
+    encoded_audio_file = fopen("outfile", "wb");
+    if (!encoded_audio_file) {
+        av_log(NULL, AV_LOG_ERROR, "Could not open output audio file\n");
+        fclose(input_audio_file);
+        return AVERROR_EXIT;
+    }
+
+    /**
+    * Allocate the encoder's context and open the encoder
+    */
+    audio_codec = avcodec_find_encoder(ENCODER_ID);
+    if (!audio_codec) {
+        av_log(NULL, AV_LOG_ERROR, "Could not find encoder's codec\n");
+        ret_val = AVERROR_EXIT;
+        goto end;
+    }
+    if ((ret_val = check_if_samplerate_is_supported(audio_codec, SAMPLE_RATE)) != 0) {
+        av_log(NULL, AV_LOG_ERROR, "Audio codec doesn't support input samplerate %d\n", SAMPLE_RATE);
+        goto end;
+    }
+    audio_encoder_ctx = avcodec_alloc_context3(audio_codec);
+    if (!audio_codec) {
+        av_log(NULL, AV_LOG_ERROR, "Could not allocate the encoding context\n");
+        ret_val = AVERROR_EXIT;
+        goto end;
+    }
+    av_channel_layout_default(&out_ch_layout, OUTPUT_CHANNELS);
+    audio_encoder_ctx->sample_fmt = OUTPUT_SAMPLE_FMT;
+    audio_encoder_ctx->bit_rate = ENCODER_BITRATE;
+    audio_encoder_ctx->sample_rate = SAMPLE_RATE;
+    audio_encoder_ctx->ch_layout = out_ch_layout;
+    audio_encoder_ctx->time_base = (AVRational){1, SAMPLE_RATE};
+    audio_encoder_ctx->codec_type = AVMEDIA_TYPE_AUDIO ;
+    if ((ret_val = avcodec_open2(audio_encoder_ctx, audio_codec, NULL)) < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Could not open input codec (error '%s')\n", av_err2str(ret_val));
+        goto end;
+    }
+
+    /**
+    * Allocate an AVFrame which will be filled with the input file's data.
+    */
+    if (!(input_audio_frame = av_frame_alloc())) {
+        av_log(NULL, AV_LOG_ERROR, "Could not allocate input frame\n");
+        ret_val = AVERROR(ENOMEM);
+        goto end;
+    }
+    av_channel_layout_default(&in_ch_layout, INPUT_CHANNELS);
+    input_audio_frame->nb_samples     = audio_encoder_ctx->frame_size;
+    input_audio_frame->format         = INPUT_SAMPLE_FMT;
+    input_audio_frame->sample_rate    = SAMPLE_RATE;
+    input_audio_frame->ch_layout      = in_ch_layout;
+    // Allocate the frame's data buffer
+    if ((ret_val = av_frame_get_buffer(input_audio_frame, 0)) < 0) {
+        av_log(NULL, AV_LOG_ERROR,
+               "Could not allocate container for input frame samples (error '%s')\n", av_err2str(ret_val));
+        ret_val = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    /**
+    * Input data must be converted in the right format required by the encoder.
+    * We allocate a SwrContext and an AVFrame (which will contain the converted samples) for this task.
+    * The AVFrame will feed the encoding function (avcodec_send_frame())
+    */
+    swr_alloc_set_opts2(&audio_convert_context,
+                        &out_ch_layout,
+                        OUTPUT_SAMPLE_FMT,
+                        SAMPLE_RATE,
+                        &in_ch_layout,
+                        INPUT_SAMPLE_FMT,
+                        SAMPLE_RATE,
+                        0,
+                        NULL);
+    if (!audio_convert_context) {
+        av_log(NULL, AV_LOG_ERROR, "Could not allocate resample context\n");
+        ret_val = AVERROR(ENOMEM);
+        goto end;
+    }
+    if (!(converted_audio_frame = av_frame_alloc())) {
+        av_log(NULL, AV_LOG_ERROR, "Could not allocate resampled frame\n");
+        ret_val = AVERROR(ENOMEM);
+        goto end;
+    }
+    converted_audio_frame->nb_samples     = audio_encoder_ctx->frame_size;
+    converted_audio_frame->format         = audio_encoder_ctx->sample_fmt;
+    converted_audio_frame->ch_layout      = audio_encoder_ctx->ch_layout;
+    converted_audio_frame->sample_rate    = SAMPLE_RATE;
+    if ((ret_val = av_frame_get_buffer(converted_audio_frame, 0)) < 0) {
+        av_log(NULL, AV_LOG_ERROR,
+               "Could not allocate a buffer for resampled frame samples (error '%s')\n", av_err2str(ret_val));
+        goto end;
+    }
+
+    /**
+    * Create the output container for the encoded frames
+    */
+    out_container = av_guess_format(argv[2], NULL, NULL);
+    if (!out_container) {
+        av_log(NULL, AV_LOG_ERROR, "Could not find output format\n");
+        ret_val = AVERROR_EXIT;
+        goto end;
+    }
+    if ((ret_val = avformat_alloc_output_context2(&out_container_ctx, out_container, "", NULL)) < 0) {
+        av_log(NULL, AV_LOG_ERROR, "Could not create output context (error '%s')\n", av_err2str(ret_val));
+        goto end;
+    }
+    if (!(out_container_buffer = av_malloc(out_container_buffer_size))) {
+        av_log(NULL, AV_LOG_ERROR, "Could not allocate a buffer for the I/O output context\n");
+        ret_val = AVERROR(ENOMEM);
+        goto end;
+    }
+
+    /**
+    * Create an I/O context for the muxer's container with a write callback (write_muxed_data()),
+    * so that muxed data will be accessed through this function and can be managed by the user.
+    */
+    if (!(avio_ctx = avio_alloc_context(out_container_buffer, out_container_buffer_size,
+                                        1, encoded_audio_file, NULL,
+                                        &write_muxed_data, NULL))) {
+        av_log(NULL, AV_LOG_ERROR, "Could not create I/O output context\n");
+        ret_val = AVERROR_EXIT;
+        goto end;
+    }
+
+    /**
+    * Link the container's context to the previous I/O context
+    */
+    out_container_ctx->pb = avio_ctx;
+    if (!(out_stream = avformat_new_stream(out_container_ctx, NULL))) {
+        av_log(NULL, AV_LOG_ERROR, "Could not create new stream\n");
+        ret_val = AVERROR(ENOMEM);
+        goto end;
+    }
+    out_stream->id = out_container_ctx->nb_streams-1;
+
+    // Copy the encoder's parameters
+    avcodec_parameters_from_context(out_stream->codecpar, audio_encoder_ctx);
+    // Allocate the stream private data and write the stream header
+    if (avformat_write_header(out_container_ctx, NULL) < 0) {
+        av_log(NULL, AV_LOG_ERROR, "avformat_write_header() error\n");
+        ret_val = AVERROR_EXIT;
+        goto end;
+    }
+
+    /**
+    * (a) Fill the input frame's data buffer with input file data
+    * (b) Convert the input frame to output sample format
+    * (c) Assign a pts to the converted frame based on the number ot total samples read
+    * (d) Send the converted frame to the encoder
+    * (e) Get the encoded packet
+    * (f) Rescale the timestamps of the encoded packet to the muxer's timebase
+    * (g) Send the encoded packet, with rescaled timestamps, to the muxer
+    * (h) Muxed data is caught in write_muxed_data() callback (see above) and it is
+    *     written to the output audio file
+    */
+    encoded_audio_packet = av_packet_alloc();
+    while (1) {
+
+        audio_bytes_to_encode = fread(input_audio_frame->data[0], 1,
+                                      input_audio_frame->linesize[0], input_audio_file); //(a)
+        if (audio_bytes_to_encode != input_audio_frame->linesize[0]) {
+            break;
+        } else {
+            if (av_frame_make_writable(converted_audio_frame) < 0) {
+                av_log(NULL, AV_LOG_ERROR, "av_frame_make_writable() error\n");
+                ret_val = AVERROR_EXIT;
+                goto end;
+            }
+
+            if ((ret_val = swr_convert_frame(audio_convert_context,
+                                             converted_audio_frame,
+                                             (const AVFrame *)input_audio_frame)) != 0) { //(b)
+                av_log(NULL, AV_LOG_ERROR,
+                       "Error resampling input audio frame (error '%s')\n", av_err2str(ret_val));
+                goto end;
+            }
+
+            converted_audio_frame->pts = converted_audio_frame->nb_samples*(encoded_pkt_counter-1); //(c)
+
+            if ((ret_val = avcodec_send_frame(audio_encoder_ctx, converted_audio_frame)) == 0) //(d)
+                ret_val = avcodec_receive_packet(audio_encoder_ctx, encoded_audio_packet); //(e)
+            else {
+                av_log(NULL, AV_LOG_ERROR,
+                       "Error encoding frame (error '%s')\n", av_err2str(ret_val));
+                goto end;
+            }
+
+            if (ret_val == 0) {
+                encoded_audio_packet->dts = encoded_audio_packet->pts = converted_audio_frame->pts;
+                av_packet_rescale_ts(encoded_audio_packet, audio_encoder_ctx->time_base,
+                                     out_stream->time_base); //(f)
+                if ((ret_val = mux_encoded_packet(encoded_audio_packet, out_container_ctx)) < 0) //(g)
+                    goto end;
+                encoded_pkt_counter++;
+            } else if (ret_val != AVERROR(EAGAIN)) {
+                av_log(NULL, AV_LOG_ERROR,
+                       "Error receiving encoded packet (error '%s')\n", av_err2str(ret_val));
+                goto end;
+            }
+        }
+    }
+
+    // Flush cached packets
+    if ((ret_val = avcodec_send_frame(audio_encoder_ctx, NULL)) == 0)
+        do {
+            ret_val = avcodec_receive_packet(audio_encoder_ctx, encoded_audio_packet);
+            if (ret_val == 0) {
+                encoded_audio_packet->dts = converted_audio_frame->nb_samples*(encoded_pkt_counter-1);
+                encoded_audio_packet->pts = encoded_audio_packet->dts;
+                av_packet_rescale_ts(encoded_audio_packet, audio_encoder_ctx->time_base,
+                                     out_stream->time_base);
+                if ((ret_val = mux_encoded_packet(encoded_audio_packet, out_container_ctx)) < 0)
+                    goto end;
+                encoded_pkt_counter++;
+            }
+        } while (ret_val == 0);
+
+    av_write_trailer(out_container_ctx);
+
+end:
+
+    fclose(input_audio_file);
+    fclose(encoded_audio_file);
+    avcodec_free_context(&audio_encoder_ctx);
+    av_frame_free(&input_audio_frame);
+    swr_free(&audio_convert_context);
+    av_frame_free(&converted_audio_frame);
+    avformat_free_context(out_container_ctx);
+    av_freep(&avio_ctx);
+    av_freep(&out_container_buffer);
+    av_packet_free(&encoded_audio_packet);
+
+    return ret_val;
+
+}
-- 
2.32.0

/*
 * Copyright (c) 2019 Paolo Prete (paolopr976 at gmail.com)
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/**
 * @file
 * API example for converting, encoding and muxing raw audio files.
 * This example reads a raw audio input file, converts it to a sample format accepted by
 * a given encoder, performs encoding and puts the encoded frames into a muxing container.
 * The encoded stream is written to a file named "outfile".
 * It can be adapted, with few changes, to a custom raw audio source (i.e: a live one).
 * It uses a custom I/O write callback (write_muxed_data()) in order to show how to access
 * muxed packets written in memory, before they are written to the output file.
 *
 * The raw input audio file can be created with:
 *
 * ffmpeg -i some_audio_file -f f32le -acodec pcm_f32le -ac 2 -ar 48000 raw_audio_file.raw
 *
 * @example convert_encode_mux_audio.c
 */

#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/timestamp.h>
#include <libswresample/swresample.h>

// Customizable parameters
#define SAMPLE_RATE 48000
#define ENCODER_ID AV_CODEC_ID_AAC
#define ENCODER_BITRATE 64000
#define INPUT_SAMPLE_FMT AV_SAMPLE_FMT_FLT
#define OUTPUT_SAMPLE_FMT AV_SAMPLE_FMT_FLTP
#define INPUT_CHANNELS 2
#define OUTPUT_CHANNELS 2

static int encoded_pkt_counter = 1;

static int write_muxed_data(void *opaque, uint8_t *data, int size)
{
    FILE *encoded_audio_file = (FILE *)opaque;
    fwrite(data, 1, size, encoded_audio_file); //(h)
    return size;
}

static int mux_encoded_packet(AVPacket *encoded_audio_packet, AVFormatContext *out_container_ctx)
{
    int ret_val;
    if ((ret_val = av_write_frame(out_container_ctx, encoded_audio_packet)) < 0) {
        av_log(NULL, AV_LOG_ERROR, "Error calling av_write_frame() (error '%s')\n", av_err2str(ret_val));
    } else {
        av_log(NULL, AV_LOG_INFO, "Encoded packet %d, size=%d, pts_time=%s\n",
               encoded_pkt_counter, encoded_audio_packet->size,
               av_ts2timestr(encoded_audio_packet->pts, &out_container_ctx->streams[0]->time_base));
    }
    return ret_val;
}

static int check_if_samplerate_is_supported(const AVCodec *audio_codec, int samplerate)
{
    const int *samplerates_list = audio_codec->supported_samplerates;
    while (*samplerates_list) {
        if (*samplerates_list == samplerate)
            return 0;
        samplerates_list++;
    }
    return 1;
}

int main(int argc, char **argv)
{
    FILE *input_audio_file = NULL, *encoded_audio_file = NULL;
    AVChannelLayout in_ch_layout;
    AVChannelLayout out_ch_layout;
    const AVCodec *audio_codec = NULL;
    AVCodecContext *audio_encoder_ctx = NULL;
    AVFrame *input_audio_frame = NULL, *converted_audio_frame = NULL;
    SwrContext *audio_convert_context = NULL;
    const AVOutputFormat *out_container = NULL;
    AVFormatContext *out_container_ctx = NULL;
    uint8_t *out_container_buffer = NULL;
    size_t out_container_buffer_size = 4096;
    AVIOContext *avio_ctx = NULL;
    AVStream *out_stream = NULL;
    AVPacket *encoded_audio_packet = NULL;
    int ret_val = 0;
    int audio_bytes_to_encode;

    if (argc != 3) {
        printf("Usage: %s <raw audio input file (INPUT_CHANNELS, INPUT_SAMPLE_FMT, SAMPLE_RATE)>, <container format>\n", argv[0]);
        return 1;
    }

    input_audio_file = fopen(argv[1], "rb");
    if (!input_audio_file) {
        av_log(NULL, AV_LOG_ERROR, "Could not open input audio file\n");
        return AVERROR_EXIT;
    }

    encoded_audio_file = fopen("outfile", "wb");
    if (!encoded_audio_file) {
        av_log(NULL, AV_LOG_ERROR, "Could not open output audio file\n");
        fclose(input_audio_file);
        return AVERROR_EXIT;
    }

    /**
    * Allocate the encoder's context and open the encoder
    */
    audio_codec = avcodec_find_encoder(ENCODER_ID);
    if (!audio_codec) {
        av_log(NULL, AV_LOG_ERROR, "Could not find encoder's codec\n");
        ret_val = AVERROR_EXIT;
        goto end;
    }
    if ((ret_val = check_if_samplerate_is_supported(audio_codec, SAMPLE_RATE)) != 0) {
        av_log(NULL, AV_LOG_ERROR, "Audio codec doesn't support input samplerate %d\n", SAMPLE_RATE);
        goto end;
    }
    audio_encoder_ctx = avcodec_alloc_context3(audio_codec);
    if (!audio_codec) {
        av_log(NULL, AV_LOG_ERROR, "Could not allocate the encoding context\n");
        ret_val = AVERROR_EXIT;
        goto end;
    }
    av_channel_layout_default(&out_ch_layout, OUTPUT_CHANNELS);
    audio_encoder_ctx->sample_fmt = OUTPUT_SAMPLE_FMT;
    audio_encoder_ctx->bit_rate = ENCODER_BITRATE;
    audio_encoder_ctx->sample_rate = SAMPLE_RATE;
    audio_encoder_ctx->ch_layout = out_ch_layout;
    audio_encoder_ctx->time_base = (AVRational){1, SAMPLE_RATE};
    audio_encoder_ctx->codec_type = AVMEDIA_TYPE_AUDIO ;
    if ((ret_val = avcodec_open2(audio_encoder_ctx, audio_codec, NULL)) < 0) {
        av_log(NULL, AV_LOG_ERROR, "Could not open input codec (error '%s')\n", av_err2str(ret_val));
        goto end;
    }

    /**
    * Allocate an AVFrame which will be filled with the input file's data.
    */
    if (!(input_audio_frame = av_frame_alloc())) {
        av_log(NULL, AV_LOG_ERROR, "Could not allocate input frame\n");
        ret_val = AVERROR(ENOMEM);
        goto end;
    }
    av_channel_layout_default(&in_ch_layout, INPUT_CHANNELS);
    input_audio_frame->nb_samples     = audio_encoder_ctx->frame_size;
    input_audio_frame->format         = INPUT_SAMPLE_FMT;
    input_audio_frame->sample_rate    = SAMPLE_RATE;
    input_audio_frame->ch_layout      = in_ch_layout;
    // Allocate the frame's data buffer
    if ((ret_val = av_frame_get_buffer(input_audio_frame, 0)) < 0) {
        av_log(NULL, AV_LOG_ERROR,
               "Could not allocate container for input frame samples (error '%s')\n", av_err2str(ret_val));
        ret_val = AVERROR(ENOMEM);
        goto end;
    }

    /**
    * Input data must be converted in the right format required by the encoder.
    * We allocate a SwrContext and an AVFrame (which will contain the converted samples) for this task.
    * The AVFrame will feed the encoding function (avcodec_send_frame())
    */
    swr_alloc_set_opts2(&audio_convert_context,
                        &out_ch_layout,
                        OUTPUT_SAMPLE_FMT,
                        SAMPLE_RATE,
                        &in_ch_layout,
                        INPUT_SAMPLE_FMT,
                        SAMPLE_RATE,
                        0,
                        NULL);
    if (!audio_convert_context) {
        av_log(NULL, AV_LOG_ERROR, "Could not allocate resample context\n");
        ret_val = AVERROR(ENOMEM);
        goto end;
    }
    if (!(converted_audio_frame = av_frame_alloc())) {
        av_log(NULL, AV_LOG_ERROR, "Could not allocate resampled frame\n");
        ret_val = AVERROR(ENOMEM);
        goto end;
    }
    converted_audio_frame->nb_samples     = audio_encoder_ctx->frame_size;
    converted_audio_frame->format         = audio_encoder_ctx->sample_fmt;
    converted_audio_frame->ch_layout      = audio_encoder_ctx->ch_layout;
    converted_audio_frame->sample_rate    = SAMPLE_RATE;
    if ((ret_val = av_frame_get_buffer(converted_audio_frame, 0)) < 0) {
        av_log(NULL, AV_LOG_ERROR,
               "Could not allocate a buffer for resampled frame samples (error '%s')\n", av_err2str(ret_val));
        goto end;
    }

    /**
    * Create the output container for the encoded frames
    */
    out_container = av_guess_format(argv[2], NULL, NULL);
    if (!out_container) {
        av_log(NULL, AV_LOG_ERROR, "Could not find output format\n");
        ret_val = AVERROR_EXIT;
        goto end;
    }
    if ((ret_val = avformat_alloc_output_context2(&out_container_ctx, out_container, "", NULL)) < 0) {
        av_log(NULL, AV_LOG_ERROR, "Could not create output context (error '%s')\n", av_err2str(ret_val));
        goto end;
    }
    if (!(out_container_buffer = av_malloc(out_container_buffer_size))) {
        av_log(NULL, AV_LOG_ERROR, "Could not allocate a buffer for the I/O output context\n");
        ret_val = AVERROR(ENOMEM);
        goto end;
    }

    /**
    * Create an I/O context for the muxer's container with a write callback (write_muxed_data()),
    * so that muxed data will be accessed through this function and can be managed by the user.
    */
    if (!(avio_ctx = avio_alloc_context(out_container_buffer, out_container_buffer_size,
                                        1, encoded_audio_file, NULL,
                                        &write_muxed_data, NULL))) {
        av_log(NULL, AV_LOG_ERROR, "Could not create I/O output context\n");
        ret_val = AVERROR_EXIT;
        goto end;
    }

    /**
    * Link the container's context to the previous I/O context
    */
    out_container_ctx->pb = avio_ctx;
    if (!(out_stream = avformat_new_stream(out_container_ctx, NULL))) {
        av_log(NULL, AV_LOG_ERROR, "Could not create new stream\n");
        ret_val = AVERROR(ENOMEM);
        goto end;
    }
    out_stream->id = out_container_ctx->nb_streams-1;

    // Copy the encoder's parameters
    avcodec_parameters_from_context(out_stream->codecpar, audio_encoder_ctx);
    // Allocate the stream private data and write the stream header
    if (avformat_write_header(out_container_ctx, NULL) < 0) {
        av_log(NULL, AV_LOG_ERROR, "avformat_write_header() error\n");
        ret_val = AVERROR_EXIT;
        goto end;
    }

    /**
    * (a) Fill the input frame's data buffer with input file data
    * (b) Convert the input frame to output sample format
    * (c) Assign a pts to the converted frame based on the number ot total samples read
    * (d) Send the converted frame to the encoder
    * (e) Get the encoded packet
    * (f) Rescale the timestamps of the encoded packet to the muxer's timebase
    * (g) Send the encoded packet, with rescaled timestamps, to the muxer
    * (h) Muxed data is caught in write_muxed_data() callback (see above) and it is
    *     written to the output audio file
    */
    encoded_audio_packet = av_packet_alloc();
    while (1) {

        audio_bytes_to_encode = fread(input_audio_frame->data[0], 1,
                                      input_audio_frame->linesize[0], input_audio_file); //(a)
        if (audio_bytes_to_encode != input_audio_frame->linesize[0]) {
            break;
        } else {
            if (av_frame_make_writable(converted_audio_frame) < 0) {
                av_log(NULL, AV_LOG_ERROR, "av_frame_make_writable() error\n");
                ret_val = AVERROR_EXIT;
                goto end;
            }

            if ((ret_val = swr_convert_frame(audio_convert_context,
                                             converted_audio_frame,
                                             (const AVFrame *)input_audio_frame)) != 0) { //(b)
                av_log(NULL, AV_LOG_ERROR,
                       "Error resampling input audio frame (error '%s')\n", av_err2str(ret_val));
                goto end;
            }

            converted_audio_frame->pts = converted_audio_frame->nb_samples*(encoded_pkt_counter-1); //(c)

            if ((ret_val = avcodec_send_frame(audio_encoder_ctx, converted_audio_frame)) == 0) //(d)
                ret_val = avcodec_receive_packet(audio_encoder_ctx, encoded_audio_packet); //(e)
            else {
                av_log(NULL, AV_LOG_ERROR,
                       "Error encoding frame (error '%s')\n", av_err2str(ret_val));
                goto end;
            }

            if (ret_val == 0) {
                encoded_audio_packet->dts = encoded_audio_packet->pts = converted_audio_frame->pts;
                av_packet_rescale_ts(encoded_audio_packet, audio_encoder_ctx->time_base,
                                     out_stream->time_base); //(f)
                if ((ret_val = mux_encoded_packet(encoded_audio_packet, out_container_ctx)) < 0) //(g)
                    goto end;
                encoded_pkt_counter++;
            } else if (ret_val != AVERROR(EAGAIN)) {
                av_log(NULL, AV_LOG_ERROR,
                       "Error receiving encoded packet (error '%s')\n", av_err2str(ret_val));
                goto end;
            }
        }
    }

    // Flush cached packets
    if ((ret_val = avcodec_send_frame(audio_encoder_ctx, NULL)) == 0)
        do {
            ret_val = avcodec_receive_packet(audio_encoder_ctx, encoded_audio_packet);
            if (ret_val == 0) {
                encoded_audio_packet->dts = converted_audio_frame->nb_samples*(encoded_pkt_counter-1);
                encoded_audio_packet->pts = encoded_audio_packet->dts;
                av_packet_rescale_ts(encoded_audio_packet, audio_encoder_ctx->time_base,
                                     out_stream->time_base);
                if ((ret_val = mux_encoded_packet(encoded_audio_packet, out_container_ctx)) < 0)
                    goto end;
                encoded_pkt_counter++;
            }
        } while (ret_val == 0);

    av_write_trailer(out_container_ctx);

end:

    fclose(input_audio_file);
    fclose(encoded_audio_file);
    avcodec_free_context(&audio_encoder_ctx);
    av_frame_free(&input_audio_frame);
    swr_free(&audio_convert_context);
    av_frame_free(&converted_audio_frame);
    avformat_free_context(out_container_ctx);
    av_freep(&avio_ctx);
    av_freep(&out_container_buffer);
    av_packet_free(&encoded_audio_packet);

    return ret_val;

}

_______________________________________________
Libav-user mailing list
[email protected]
https://ffmpeg.org/mailman/listinfo/libav-user

To unsubscribe, visit link above, or email
[email protected] with subject "unsubscribe".

[Libav-user] [PATCH] New API usage example (reading, converting, encoding and muxing an audio file)

Reply via email to