TensorRT学习笔记--常用卷积、激活、池化和FC层算子API

news/2024/7/7 7:53:08

目录

1--Tensor算子API

1-1--卷积算子

1-2--激活算子

1-3--池化算子

1-4--FC层算子

2--代码实例

3--编译运行


1--Tensor算子API

TensorRT提供了卷积层、激活函数和池化层三种最常用算子的API:

// 创建一个空的网络
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); 

// 添加卷积层算子
nvinfer1::IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 64, nvinfer1::DimsHW{3, 3}, weightMap["features.0.weight"], weightMap["features.0.bias"]);

// 添加激活算子
nvinfer1::IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), nvinfer1::ActivationType::kRELU);

// 添加池化算子
nvinfer1::IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});

1-1--卷积算子

IConvolutionLayer* addConvolutionNd(
    ITensor& input, 
    int32_t nbOutputMaps, 
    Dims kernelSize, 
    Weights kernelWeights, 
    Weights biasWeights
)

第一个参数表示输入的Tensor数据;

第二个参数表示卷积层输出的特征图数,即通道数channel;

第三个参数表示使用的卷积核大小;

第四个参数和第五个参数表示加载的权重;

1-2--激活算子

IActivationLayer* addActivation(
    ITensor& input, 
    ActivationType type
)

第一个参数表示输入的Tensor数据;

第二个参数表示使用的激活函数类型,包括以下激活函数:

enum class ActivationType : int32_t
{
    kRELU = 0,             //!< Rectified linear activation.
    kSIGMOID = 1,          //!< Sigmoid activation.
    kTANH = 2,             //!< TanH activation.
    kLEAKY_RELU = 3,       //!< LeakyRelu activation: x>=0 ? x : alpha * x.
    kELU = 4,              //!< Elu activation: x>=0 ? x : alpha * (exp(x) - 1).
    kSELU = 5,             //!< Selu activation: x>0 ? beta * x : beta * (alpha*exp(x) - alpha)
    kSOFTSIGN = 6,         //!< Softsign activation: x / (1+|x|)
    kSOFTPLUS = 7,         //!< Parametric softplus activation: alpha*log(exp(beta*x)+1)
    kCLIP = 8,             //!< Clip activation: max(alpha, min(beta, x))
    kHARD_SIGMOID = 9,     //!< Hard sigmoid activation: max(0, min(1, alpha*x+beta))
    kSCALED_TANH = 10,     //!< Scaled tanh activation: alpha*tanh(beta*x)
    kTHRESHOLDED_RELU = 11 //!< Thresholded ReLU activation: x>alpha ? x : 0
};

1-3--池化算子

IPoolingLayer* addPoolingNd(
    ITensor& input, 
    PoolingType type, 
    Dims windowSize
)

第一个参数表示输入的Tensor数据;

第二个参数表示使用的池化类型;

第三个参数表示池化窗口的大小;

提供的池化类型包括:

enum class PoolingType : int32_t
{
    kMAX = 0,              // Maximum over elements
    kAVERAGE = 1,          // Average over elements. If the tensor is padded, the count includes the padding
    kMAX_AVERAGE_BLEND = 2 // Blending between max and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool
};

1-4--FC层算子

IFullyConnectedLayer* addFullyConnected(
    ITensor& input, 
    int32_t nbOutputs, 
    Weights kernelWeights, 
    Weights biasWeights
)

第一个参数表示输入的Tensor数据;

第二个参数表示输出的通道数;

第三个参数和第四个参数表示加载的权重;

2--代码实例

基于算子 API 搭建 VGG11:(完整可运行的代码参考:liujf69/TensorRT-Demo)

核心程序代码:

// 创建builder和config
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();

// 基于builder创建network
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); // 一开始是空的

// 调用API搭建Network
// 创建输入
nvinfer1::ITensor* data = network->addInput(this->INPUT_BLOB_NAME, dt, nvinfer1::Dims3{3, this->INPUT_H, this->INPUT_W});
// 搭建卷积层
nvinfer1::IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 64, nvinfer1::DimsHW{3, 3}, weightMap["features.0.weight"], weightMap["features.0.bias"]);
conv1->setPaddingNd(nvinfer1::DimsHW{1, 1});
// 搭建激活层
nvinfer1::IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), nvinfer1::ActivationType::kRELU);
// 搭建池化层
nvinfer1::IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});
pool1->setStrideNd(nvinfer1::DimsHW{2, 2});
...
// 搭建FC层
nvinfer1::IFullyConnectedLayer* fc1 = network->addFullyConnected(*pool1->getOutput(0), 4096, weightMap["classifier.0.weight"], weightMap["classifier.0.bias"]);
...

// 基于config和network生成engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(1 << 20);
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
...

主程序代码:

#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <vector>
#include <chrono>
#include "logging.h"
#include <iostream>

#define CHECK(status) \
    do\
    {\
        auto ret = (status);\
        if (ret != 0)\
        {\
            std::cerr << "Cuda failure: " << ret << std::endl;\
            abort();\
        }\
    } while (0)

static Logger gLogger; // 日志

class VGG_Demo{
public:
    VGG_Demo(){
        this->prob = new float[OUTPUT_SIZE];
    }
    ~VGG_Demo(){
        delete[] prob;
    }
    int serialize();
    void APIToModel(unsigned int maxBatchSize, nvinfer1::IHostMemory** modelStream);
    nvinfer1::ICudaEngine* createEngine(unsigned int maxBatchSize, 
                                            nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt);
    std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file);
    void doInference(nvinfer1::IExecutionContext& context, float* input, float* output, int batchSize);

    void deserialize(float* data);
    void load_engine();
    
    const char* INPUT_BLOB_NAME = "data"; // 输入名称
    const char* OUTPUT_BLOB_NAME = "prob"; // 输出名称
    const int INPUT_H = 224; // 输入数据高度
    const int INPUT_W = 224; // 输入数据宽度
    const int OUTPUT_SIZE = 1000; // 输出大小

    std::string engine_file = "./vgg.engine";
    char* trtModelStream = nullptr;
    float* prob = nullptr;
    size_t size = 0;
};

int VGG_Demo::serialize(){
    nvinfer1::IHostMemory* modelStream  = nullptr;
    this->APIToModel(1, &modelStream); // 调用API构建network
    assert(modelStream != nullptr);

    // 保存
    std::ofstream p("./vgg.engine", std::ios::binary);
    if (!p) {
        std::cerr << "could not open plan output file" << std::endl;
        return -1;
    }
    p.write(reinterpret_cast<const char*>(modelStream->data()), modelStream->size());
    modelStream->destroy();
    return 1;
}

void VGG_Demo::APIToModel(unsigned int maxBatchSize, nvinfer1::IHostMemory** modelStream){
    // 创建builder和config
    nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
    nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();

    nvinfer1::ICudaEngine* engine = this->createEngine(maxBatchSize, builder, config, nvinfer1::DataType::kFLOAT);
    assert(engine != nullptr);

    // 序列化
    *modelStream = engine->serialize();
    // 销毁
    engine->destroy();
    builder->destroy();
    config->destroy();
}

nvinfer1::ICudaEngine* VGG_Demo::createEngine(unsigned int maxBatchSize, nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt){
    // 加载权重
    std::map<std::string, nvinfer1::Weights> weightMap = loadWeights("../weights/vgg.wts");
    nvinfer1::Weights emptywts{nvinfer1::DataType::kFLOAT, nullptr, 0};
    
    nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); // 创建一个空的network
    nvinfer1::ITensor* data = network->addInput(this->INPUT_BLOB_NAME, dt, nvinfer1::Dims3{3, this->INPUT_H, this->INPUT_W}); // 创建输入
    assert(data);

    // 使用卷积、激活和池化三种算子,按顺序连接三种算子,并用对应的权重初始化
    nvinfer1::IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 64, nvinfer1::DimsHW{3, 3}, weightMap["features.0.weight"], weightMap["features.0.bias"]);
    assert(conv1);
    conv1->setPaddingNd(nvinfer1::DimsHW{1, 1});
    nvinfer1::IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), nvinfer1::ActivationType::kRELU);
    assert(relu1);
    nvinfer1::IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});
    assert(pool1);
    pool1->setStrideNd(nvinfer1::DimsHW{2, 2});

    conv1 = network->addConvolutionNd(*pool1->getOutput(0), 128, nvinfer1::DimsHW{3, 3}, weightMap["features.3.weight"], weightMap["features.3.bias"]);
    conv1->setPaddingNd(nvinfer1::DimsHW{1, 1});
    relu1 = network->addActivation(*conv1->getOutput(0), nvinfer1::ActivationType::kRELU);
    pool1 = network->addPoolingNd(*relu1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});
    pool1->setStrideNd(nvinfer1::DimsHW{2, 2});

    conv1 = network->addConvolutionNd(*pool1->getOutput(0), 256, nvinfer1::DimsHW{3, 3}, weightMap["features.6.weight"], weightMap["features.6.bias"]);
    conv1->setPaddingNd(nvinfer1::DimsHW{1, 1});
    relu1 = network->addActivation(*conv1->getOutput(0), nvinfer1::ActivationType::kRELU);
    conv1 = network->addConvolutionNd(*relu1->getOutput(0), 256, nvinfer1::DimsHW{3, 3}, weightMap["features.8.weight"], weightMap["features.8.bias"]);
    conv1->setPaddingNd(nvinfer1::DimsHW{1, 1});
    relu1 = network->addActivation(*conv1->getOutput(0), nvinfer1::ActivationType::kRELU);
    pool1 = network->addPoolingNd(*relu1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});
    pool1->setStrideNd(nvinfer1::DimsHW{2, 2});

    conv1 = network->addConvolutionNd(*pool1->getOutput(0), 512, nvinfer1::DimsHW{3, 3}, weightMap["features.11.weight"], weightMap["features.11.bias"]);
    conv1->setPaddingNd(nvinfer1::DimsHW{1, 1});
    relu1 = network->addActivation(*conv1->getOutput(0), nvinfer1::ActivationType::kRELU);
    conv1 = network->addConvolutionNd(*relu1->getOutput(0), 512, nvinfer1::DimsHW{3, 3}, weightMap["features.13.weight"], weightMap["features.13.bias"]);
    conv1->setPaddingNd(nvinfer1::DimsHW{1, 1});
    relu1 = network->addActivation(*conv1->getOutput(0), nvinfer1::ActivationType::kRELU);
    pool1 = network->addPoolingNd(*relu1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});
    pool1->setStrideNd(nvinfer1::DimsHW{2, 2});

    conv1 = network->addConvolutionNd(*pool1->getOutput(0), 512, nvinfer1::DimsHW{3, 3}, weightMap["features.16.weight"], weightMap["features.16.bias"]);
    conv1->setPaddingNd(nvinfer1::DimsHW{1, 1});
    relu1 = network->addActivation(*conv1->getOutput(0), nvinfer1::ActivationType::kRELU);
    conv1 = network->addConvolutionNd(*relu1->getOutput(0), 512, nvinfer1::DimsHW{3, 3}, weightMap["features.18.weight"], weightMap["features.18.bias"]);
    conv1->setPaddingNd(nvinfer1::DimsHW{1, 1});
    relu1 = network->addActivation(*conv1->getOutput(0), nvinfer1::ActivationType::kRELU);
    pool1 = network->addPoolingNd(*relu1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});
    pool1->setStrideNd(nvinfer1::DimsHW{2, 2});

    // 使用全连接层算子
    nvinfer1::IFullyConnectedLayer* fc1 = network->addFullyConnected(*pool1->getOutput(0), 4096, weightMap["classifier.0.weight"], weightMap["classifier.0.bias"]);
    assert(fc1);
    relu1 = network->addActivation(*fc1->getOutput(0), nvinfer1::ActivationType::kRELU);
    fc1 = network->addFullyConnected(*relu1->getOutput(0), 4096, weightMap["classifier.3.weight"], weightMap["classifier.3.bias"]);
    relu1 = network->addActivation(*fc1->getOutput(0), nvinfer1::ActivationType::kRELU);
    fc1 = network->addFullyConnected(*relu1->getOutput(0), 1000, weightMap["classifier.6.weight"], weightMap["classifier.6.bias"]);

    fc1->getOutput(0)->setName(OUTPUT_BLOB_NAME); // 设置输出名称
    network->markOutput(*fc1->getOutput(0)); // 标记输出

    // 生成engine
    builder->setMaxBatchSize(maxBatchSize);
    config->setMaxWorkspaceSize(1 << 20);
    nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
    std::cout << "build out" << std::endl;

    // 生成engine后释放network
    network->destroy();
    // 释放权重内存
    for (auto& mem : weightMap) free((void*) (mem.second.values)); 

    return engine;
}

std::map<std::string, nvinfer1::Weights> VGG_Demo::loadWeights(const std::string file){
    std::cout << "Loading weights: " << file << std::endl;
    std::map<std::string, nvinfer1::Weights> weightMap; // 权重名称和权重类的哈希表
    std::ifstream input(file);
    assert(input.is_open() && "Unable to load weight file.");

    // 首先读取权重block的个数
    int32_t count;
    input >> count;
    assert(count > 0 && "Invalid weight map file.");

    // 遍历权重block
    while (count--){
        nvinfer1::Weights wt{nvinfer1::DataType::kFLOAT, nullptr, 0}; // 初始化一个权重对象
        uint32_t size;

        // Read name and type of blob
        std::string name;
        input >> name >> std::dec >> size; // std::dec表示使用十进制表示权重的size
        wt.type = nvinfer1::DataType::kFLOAT; // 设置权重的类型

        // 拷贝权重值
        uint32_t* val = reinterpret_cast<uint32_t*>(malloc(sizeof(val) * size));
        for (uint32_t x = 0, y = size; x < y; ++x){ // 拷贝size大小
            input >> std::hex >> val[x];
        }
        // 完成哈希映射
        wt.values = val;
        wt.count = size;
        weightMap[name] = wt;
    }

    return weightMap;
}

void VGG_Demo::deserialize(float* data){
    load_engine(); // 加载engine
    nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(gLogger);
    assert(runtime != nullptr);
    nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(this->trtModelStream, this->size);
    assert(engine != nullptr);
    nvinfer1::IExecutionContext* context = engine->createExecutionContext();
    assert(context != nullptr);
    delete[] this->trtModelStream; // 手动释放trtModelStream

    // 执行推理
    for (int i = 0; i < 10; i++){ // 记录推理10次的时间
        auto start = std::chrono::system_clock::now();
        doInference(*context, data, this->prob, 1);
        auto end = std::chrono::system_clock::now();
        std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
    }

    // 销毁
    context->destroy();
    engine->destroy();
    runtime->destroy();

    // 打印推理结果
    std::cout << "\nOutput:\n\n";
    for (unsigned int i = 0; i < 10; i++){ // 打印10个
        std::cout << this->prob[i] << ", ";
        if (i % 10 == 0) std::cout << i / 10 << std::endl;
    }
    std::cout << std::endl;
}

void VGG_Demo::load_engine(){
    std::ifstream file(this->engine_file, std::ios::binary);
    if(file.good()){
        file.seekg(0, file.end);
        this->size = file.tellg();
        file.seekg(0, file.beg);
        this->trtModelStream = new char[size];
        assert(this->trtModelStream);
        file.read(this->trtModelStream, size);
        file.close();
    }
}

void VGG_Demo::doInference(nvinfer1::IExecutionContext& context, float* input, float* output, int batchSize){
    const nvinfer1::ICudaEngine& engine = context.getEngine();
    assert(engine.getNbBindings() == 2);
    void* buffers[2];
    const int inputIndex = engine.getBindingIndex(this->INPUT_BLOB_NAME);
    const int outputIndex = engine.getBindingIndex(this->OUTPUT_BLOB_NAME);

    CHECK(cudaMalloc(&buffers[inputIndex], batchSize * 3 * this->INPUT_H * this->INPUT_W * sizeof(float)));
    CHECK(cudaMalloc(&buffers[outputIndex], batchSize * this->OUTPUT_SIZE * sizeof(float)));

    // 创建stream
    cudaStream_t stream;
    CHECK(cudaStreamCreate(&stream));

    // Host to device
    CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * 3 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
    context.enqueue(batchSize, buffers, stream, nullptr);
    // device to host
    CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
    cudaStreamSynchronize(stream);

    // 释放
    cudaStreamDestroy(stream);
    CHECK(cudaFree(buffers[inputIndex]));
    CHECK(cudaFree(buffers[outputIndex]));
}

int main(int argc, char** argv){
    // 判断参数是否准确
    if(argc != 2){
        std::cerr << "arguments not right!" << std::endl;
        std::cerr << "./vgg_demo -s   // serialize model to plan file" << std::endl;
        std::cerr << "./vgg_demo -d   // deserialize plan file and run inference" << std::endl;
        return -1;
    }

    VGG_Demo vgg_demo1;

    if(std::string(argv[1]) == "-s"){ // 序列化
        vgg_demo1.serialize();
    }
    else if(std::string(argv[1]) == "-d"){ // 反序列化并推理
        // 生成测试数据
        float data[3 * 224 * 224];
        for (int i = 0; i < 3 * 224 * 224; i++) data[i] = 1;
        vgg_demo1.deserialize(data);
    }
    else{
        std::cerr << "wrong arguments!" << std::endl;;
        return -1;
    }
    return 0;
}

3--编译运行

mkdir build && cd build
cmake ..
make 

./vgg_demo -s
./vgg_demo -d


http://lihuaxi.xjx100.cn/news/1703940.html

相关文章

【C++11】列表初始化

在C98中&#xff0c;标准允许使用花括号“{}”对数组元素进行统一的集合&#xff08;列表&#xff09;初始值设定&#xff0c;比如&#xff1a; int arr[5]{0}; int arr[]{1,2,3}; 这些都是合法的表达式。不过一些自定义类型&#xff0c;却无法享受这样便利的初始化。通常&…

java泛型场景补充注意事项

前言 本文不是对泛型的系统介绍&#xff0c;也不打算对其进行入门讲解&#xff0c;只是对遇到的一些泛型场景的补充。看过宋红康和韩顺平的javase课程可以花几分钟看看。 1.&符号在泛型中的使用&#xff0c;用来描述有边界的受约束泛型 class A{} interface B{} public …

OpenStack云计算平台实战-----创建空白虚拟机

1、创建空白虚拟机 需要注意的步骤会截图一下&#xff0c;其它的基本都是下一步&#xff0c;默认的即可 建议将虚拟机命名为自己的名字加后缀 将处理器数量和每个处理器的内核量都修改为2 将虚拟机的内存设置为8G&#xff0c;不然不够用 将指定磁盘大小设置为200G&#xff0c;…

『C语言进阶』字符函数和内存函数(2)

&#x1f525;博客主页&#xff1a; 小羊失眠啦. &#x1f516;系列专栏&#xff1a; C语言、Linux、Cpolar ❤️感谢大家点赞&#x1f44d;收藏⭐评论✍️ 文章目录 一、strtok函数1.1 函数认识1.2 注意事项 二、strerror函数2.1 函数认识2.2 注意事项 三、memcpy函数3.1 函数…

Linux查看日志命令

首先cd 进入服务容器里文件所在目录 1. cat 命令 适合查看简短的文件,如配置文件 application.properties,当然也可以看日志 #看配置文件 cat application.properties#看运行日志文件 cat xxx.out#配合检索命令 cat application.properties | grep xxx 2. tail -f 命令 #-1…

HttpClient / Http客户端

HttpClient / Http客户端: HttpClient的定义HttpClient的作用HttpClient的常用核心APIHttpClient要导入的依赖HttpClient发送请求的步骤HttpClient入门案例&#xff1a;HttpClient(Http客户端)发送get请求HttpClient(Http客户端)发送post请求 HttpClient工具类 (代码) HttpCli…

【ES实战】ES创建Transports客户端时间过长分析

ES创建Transports客户端时间过长分析 2023年10月19日 文章目录 ES创建Transports客户端时间过长分析问题描述问题重现问题分析是否可以配置链接超时时间节点建立连接超时逻辑为啥超时间会出现翻倍 优化方案 在创建ES Transport客户端的时&#xff0c;当出现以下场景时&#xff…

新兴网络安全威胁:数字防御新格局

根据Check Point Research (CPR)的数据&#xff0c;今年上半年犯罪活动大幅增加&#xff0c;第二季度全球每周网络攻击激增 8%&#xff0c;这创下了两年来的最高成交量。 勒索软件和黑客行为等传统威胁已经演变&#xff0c;犯罪团伙不断调整其方法和工具来渗透和影响世界各地的…