Infer Demo

This demo demonstrates how to perform video inference using a selected camera device.
Users can configure multiple supported AI models to run in parallel on Qualcomm platforms and record the output video to an .mp4 file.
The demo also provides optional FPS calculation for either the video source or the inference result.

Code Example

#include "AvtCore.h"
#include "Graph/AvtGraph.h"
#include "EncoderHelper.h"
#include "DeviceHelper.h"
#include "Source/AvtVideoSourceProperty.h"
#include "Source/Controller/AvtVideoSourceController.h"
#include "Source/AvtVideoSourceProperty.h"
#include "Sink/AvtRecordParam.h"
#include "AI/AvtNVDeepStreamVideoInference.h"
#include "AI/AvtQCOMVideoInference.h"


#include <iostream>
#include <csignal>
#include <unistd.h>
#include <locale>
#include <codecvt>
#include <cstdio>
#include <vector>
#include <string>

using namespace AVTSDK;
using namespace AVTSDK::Log;
using namespace AVTSDK::Graph;
using namespace AVTSDK::Source;
using namespace AVTSDK::Encoder;
using namespace AVTSDK::Sink;
using namespace AVTSDK::Source::Controller;
using namespace AVTSDK::AI;

using namespace std;

bool running = true;

void signalHandler(int signum)
{
    running = false;
}

vector<string> listFiles(const string &directory) {
    vector<string> files;
    string command = "ls -1 " + directory;
    FILE *pipe = popen(command.c_str(), "r");
    if (!pipe) {
        cerr << "Failed to open pipe" << endl;
        return files;
    }

    char buffer[256];
    while (fgets(buffer, sizeof(buffer), pipe) != nullptr) {
        string fileName(buffer);
        fileName.erase(fileName.find_last_not_of("\n\r") + 1);
        files.push_back(fileName);
    }

    pclose(pipe);
    return files;
}

int main()
{
    AvtCore::init(AvtLogLevel::LOG_LEVEL_WARNING, AvtLogType::LOG_TYPE_DELAYED);

    AvtGraph *graph = nullptr;
    do {
        EncoderHelper encoderHelper;
        auto vEncoderCount = encoderHelper.getVideoEncoderCount();
        if (vEncoderCount == 0) {
            cout << "No Video Encoder" << endl;
            break;
        }

        DeviceHelper devHelper;
        auto vDevCount = devHelper.getVideoDeviceCount();
        if (vDevCount == 0) {
            cout << "No Device" << endl;
            break;
        }

        AvtRecordParam recordParam;
        recordParam.mFormat = AvtRecordFormat::RECORD_FORMAT_MP4;
        recordParam.mOutputPath.setString(L"infer.mp4");

        AvtGraphFeature ft;
        ft.mVideo.mEnable = true;
        auto &vFt = ft.mVideo.mProperty;
        // Displaying results on a monitor
        // vFt.mPreview.mEnable = true;
        // vFt.mPreview.mProperty.mWindowID = 0;
#if defined(__QCOM__)
        vFt.mGraphicsAPI = AvtGraphicsAPI::GRAPHICS_API_QCOM;
#else
        vFt.mGraphicsAPI = AvtGraphicsAPI::GRAPHICS_API_NV;
#endif
        graph = new AvtGraph(ft);

        AvtVideoSourceProperty prop;
        prop.mLockDevice = true;

        if (!devHelper.selectVideoDevice(prop)) {
            cout << "Failed to select the correct device or device property" << endl;
            break;
        }

        AvtEncoderParam encodeParam;
        if (!encoderHelper.selectVideoEncoder(prop.mResolution, prop.mFrameRate, encodeParam,
                                              true)) {
            cout << "Failed to get the encoder parameters" << endl;
            break;
        }

        AvtResult result;
        graph->createGraph();
        int sourceID;
        result = graph->addSource(prop, sourceID);
        if (result != AvtResult::AVT_RESULT_OK) {
            cout << "Failed to add video source" << endl;
            break;
        }

        int encoderID;
        result = graph->createEncoder(encodeParam, encoderID);
        if (result != AvtResult::AVT_RESULT_OK) {
            cout << "Failed to create encoder" << endl;
            break;
        }

        int recordID;
        result = graph->addRecord(recordParam, encoderID, recordID);
        if (result != AvtResult::AVT_RESULT_OK) {
            cout << "Failed to add record" << endl;
            break;
        }

        auto controller = static_cast<const AvtVideoSourceController *>(graph->getSourceController(sourceID));
#if defined(__QCOM__)
        AvtQCOMVideoInference inference;
        do {
            cout << "===== Enter the number of inferences:(1~4) =====" << endl;
            cin >> inference.mInferenceNumber;
        } while (inference.mInferenceNumber < 1 || inference.mInferenceNumber > 4);

        string directory = "/opt/demo/config";
        vector<string> configFiles = listFiles(directory);

        if (configFiles.empty()) {
            cout << "No config files found in " << directory << endl;
            return 1;
        }

        cout << "===== Available config files =====" << endl;
        for (size_t i = 0; i < configFiles.size(); ++i) {
            cout << "[" << i << "] " << configFiles[i] << endl;
        }

        wstring_convert<std::codecvt_utf8<wchar_t>> converter;
        for(int i=0; i< inference.mInferenceNumber; ++i)
        {
            while(true) {
                int fileIndex;
                cout << "Enter the index of config file for inference " << (i + 1) << ": ";
                cin >> fileIndex;

                if (fileIndex < 0 || fileIndex >= static_cast<int>(configFiles.size())) {
                    cout << "Invalid index. Please choose a valid option." << endl;
                    continue;
                }
                cout << "Model selected: " + configFiles[fileIndex] << endl;

                string selectedFilePath = directory + "/" + configFiles[fileIndex];
                wstring s = converter.from_bytes(selectedFilePath);
                inference.mConfigFilePath[i].setString(s.c_str());
                break;
            }
        }

#else
        AvtNVDeepStreamVideoInference inference;

        cout << "Enter the config file path" << endl;
        string configFilePath;
        cin >> configFilePath;

        wstring_convert<std::codecvt_utf8<wchar_t>> converter;
        wstring s = converter.from_bytes(configFilePath);
        inference.mConfigFilePath.setString(s.c_str());
#endif
        result = controller->enableInference(inference);

        if (result != AvtResult::AVT_RESULT_OK) {
            cout << "Failed to enable inference!" << endl;
            break;
        }

        int fpsIndex;
        cout << "===== FPS calculation activation =====" << endl;
        cout << "[0] " << "No" << endl;
        cout << "[1] " << "Yes" << endl;
        cin >> fpsIndex;
        if (fpsIndex < 0 || fpsIndex > 1) {
            cout << "Input index out of range!" << endl;
            break;
        } else if (fpsIndex == 1) {
            AvtFeature<Source::AvtSourceFPS> FPSInfo; /**< Feature for frame per second (FPS). */
            int optionIndex = 0;
            cout << "===== Which FPS to check =====" << endl;
            cout << "[0] " << "Video source" << endl;
            cout << "[1] " << "Inference" << endl;
            cin >> optionIndex;

            if (optionIndex == 0) {
                AvtFeature<Source::AvtSourceFPS> FPSInfo;
                FPSInfo.mEnable = true;
                FPSInfo.mProperty.mCB = [] (const int &sourceID, const int &trackIndex,
                                                const double &fps, void *data) {
                    cout << "FPS: " << fps << endl;
                };
                result = controller->setFPSInfo(FPSInfo);
            } else if (optionIndex == 1) {
                AvtFeature<Source::AvtSourceFPS> inferFPSInfo;
                inferFPSInfo.mEnable = true;
                inferFPSInfo.mProperty.mCB = [] (const int &sourceID, const int &trackIndex,
                                                const double &fps, void *data) {
                    cout << "Infer FPS: " << fps << endl;
                };
                result = controller->setInferenceFPSInfo(inferFPSInfo);
            } else {
                cout << "Input index out of range!" << endl;
                break;
            }
        }

        result = graph->runGraph();
        if (result != AvtResult::AVT_RESULT_OK) {
            cout << "Failed to run graph";
            break;
        }

        signal(SIGINT, signalHandler);
        while (running)
            sleep(1);

        graph->removeRecord(recordID);

    } while (false);


    if (graph)
        delete graph;

    AvtCore::uninit();
    return 0;
}

Explanation

Helper Classes

EncoderHelper and DeviceHelper are helper classes written for the demos and are not part of the library. For details about how to handle encoders and devices, please check the source code in EncoderHelper.cpp and DeviceHelper.cpp.

Initialization

The program begins by initializing the AVT core with a warning-level log configuration.
Encoder and Device Availability Check

It checks whether both video encoders and input devices are available. If either is missing, the program exits early.
Graph and Feature Setup

An AvtGraphFeature structure is configured with video support enabled.
The appropriate graphics API is selected based on the platform.
Recording parameters are set to save the output as infer.mp4.
Note

There are two commented lines in the sample code:
```
// vFt.mPreview.mEnable = true;
// vFt.mPreview.mProperty.mWindowID = 0;
```
If you prefer to display the output on a monitor rather than saving it as an .mp4 file, uncomment the lines above and make sure to comment out the recording-related lines to avoid unintended video recording.
Graph Creation

A new AvtGraph instance is created using the configured features.

Note

If you have no idea what AvtGraph is, please refer to AVT SDK Multimedia Framework.
Device and Encoder Selection

The user is prompted to select a video input device using DeviceHelper::selectVideoDevice(). Then, an encoder is selected using EncoderHelper::selectVideoEncoder() based on the chosen resolution and frame rate.
Source and Record Node Addition

The selected video source is added to the graph using addSource().
An encoder node is created using createEncoder(), and a record node is added using addRecord() to save the output stream.
Inference Configuration

On Qualcomm platforms, AvtQCOMVideoInference is the configuration structure for the AI inference component. In this demo, users are prompted to:
1. Enter the number of AI inferences (1–4) to run in parallel.
2. Choose a config file for each inference.
Because the AI inference component is part of the source component, it is enabled and initialized with AvtVideoSourceController::enableInference().

Note

For details about the AI inference component, including the configuration file and the internal pipeline structure, please refer to AI Inference on Qualcomm Platforms.
FPS Monitoring (Optional)

Users can optionally enable FPS calculation.
If enabled, they must specify whether to measure FPS at the video source or inference output stage.
A callback is registered to display the FPS in real time via the CLI.
Graph Execution

The graph is executed using runGraph(), which starts the video processing pipeline with inference.
Runtime Loop and Termination

A signal handler is registered to listen for SIGINT (Ctrl+C).
The program enters a loop to keep the pipeline running until interrupted.
Upon termination, the record node is removed, the graph is deleted, and the AVT core is uninitialized.