forked from Qortal/Brooklyn
227 lines
8.5 KiB
C++
227 lines
8.5 KiB
C++
//
|
|
// Copyright © 2020 STMicroelectronics and Contributors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
//
|
|
|
|
#include <algorithm>
|
|
#include <getopt.h>
|
|
#include <numeric>
|
|
#include <signal.h>
|
|
#include <string>
|
|
#include <sys/time.h>
|
|
#include <vector>
|
|
|
|
#include <armnn/BackendId.hpp>
|
|
#include <armnn/BackendRegistry.hpp>
|
|
#include <armnn/IRuntime.hpp>
|
|
#include <armnn/utility/NumericCast.hpp>
|
|
#include <armnnTfLiteParser/ITfLiteParser.hpp>
|
|
|
|
// Application parameters
|
|
std::vector<armnn::BackendId> default_preferred_backends_order = {armnn::Compute::CpuAcc, armnn::Compute::CpuRef};
|
|
std::vector<armnn::BackendId> preferred_backends_order;
|
|
std::string model_file_str;
|
|
std::string preferred_backend_str;
|
|
int nb_loops = 1;
|
|
|
|
double get_us(struct timeval t)
|
|
{
|
|
return (armnn::numeric_cast<double>(t.tv_sec) *
|
|
armnn::numeric_cast<double>(1000000) +
|
|
armnn::numeric_cast<double>(t.tv_usec));
|
|
}
|
|
|
|
double get_ms(struct timeval t)
|
|
{
|
|
return (armnn::numeric_cast<double>(t.tv_sec) *
|
|
armnn::numeric_cast<double>(1000) +
|
|
armnn::numeric_cast<double>(t.tv_usec) / 1000);
|
|
}
|
|
|
|
static void print_help(char** argv)
|
|
{
|
|
std::cout <<
|
|
"Usage: " << argv[0] << " -m <model .tflite>\n"
|
|
"\n"
|
|
"-m --model_file <.tflite file path>: .tflite model to be executed\n"
|
|
"-b --backend <device>: preferred backend device to run layers on by default. Possible choices: "
|
|
<< armnn::BackendRegistryInstance().GetBackendIdsAsString() << "\n"
|
|
" (by default CpuAcc, CpuRef)\n"
|
|
"-l --loops <int>: provide the number of times the inference will be executed\n"
|
|
" (by default nb_loops=1)\n"
|
|
"--help: show this help\n";
|
|
exit(1);
|
|
}
|
|
|
|
void process_args(int argc, char** argv)
|
|
{
|
|
const char* const short_opts = "m:b:l:h";
|
|
const option long_opts[] = {
|
|
{"model_file", required_argument, nullptr, 'm'},
|
|
{"backend", required_argument, nullptr, 'b'},
|
|
{"loops", required_argument, nullptr, 'l'},
|
|
{"help", no_argument, nullptr, 'h'},
|
|
{nullptr, no_argument, nullptr, 0}
|
|
};
|
|
|
|
while (true)
|
|
{
|
|
const auto opt = getopt_long(argc, argv, short_opts, long_opts, nullptr);
|
|
|
|
if (-1 == opt)
|
|
{
|
|
break;
|
|
}
|
|
|
|
switch (opt)
|
|
{
|
|
case 'm':
|
|
model_file_str = std::string(optarg);
|
|
std::cout << "model file set to: " << model_file_str << std::endl;
|
|
break;
|
|
case 'b':
|
|
preferred_backend_str = std::string(optarg);
|
|
// Overwrite the backend
|
|
preferred_backends_order.push_back(preferred_backend_str);
|
|
|
|
std::cout << "backend device set to:" << preferred_backend_str << std::endl;;
|
|
break;
|
|
case 'l':
|
|
nb_loops = std::stoi(optarg);
|
|
std::cout << "benchmark will execute " << nb_loops << " inference(s)" << std::endl;
|
|
break;
|
|
case 'h': // -h or --help
|
|
case '?': // Unrecognized option
|
|
default:
|
|
print_help(argv);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (model_file_str.empty())
|
|
{
|
|
print_help(argv);
|
|
}
|
|
}
|
|
|
|
int main(int argc, char* argv[])
|
|
{
|
|
std::vector<double> inferenceTimes;
|
|
|
|
// Get options
|
|
process_args(argc, argv);
|
|
|
|
// Create the runtime
|
|
armnn::IRuntime::CreationOptions options;
|
|
armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
|
|
|
|
// Create Parser
|
|
armnnTfLiteParser::ITfLiteParserPtr armnnparser(armnnTfLiteParser::ITfLiteParser::Create());
|
|
|
|
// Create a network
|
|
armnn::INetworkPtr network = armnnparser->CreateNetworkFromBinaryFile(model_file_str.c_str());
|
|
if (!network)
|
|
{
|
|
throw armnn::Exception("Failed to create an ArmNN network");
|
|
}
|
|
|
|
// Optimize the network
|
|
if (preferred_backends_order.size() == 0)
|
|
{
|
|
preferred_backends_order = default_preferred_backends_order;
|
|
}
|
|
armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*network,
|
|
preferred_backends_order,
|
|
runtime->GetDeviceSpec());
|
|
armnn::NetworkId networkId;
|
|
|
|
// Load the network in to the runtime
|
|
runtime->LoadNetwork(networkId, std::move(optimizedNet));
|
|
|
|
// Check the number of subgraph
|
|
if (armnnparser->GetSubgraphCount() != 1)
|
|
{
|
|
std::cout << "Model with more than 1 subgraph is not supported by this benchmark application.\n";
|
|
exit(0);
|
|
}
|
|
size_t subgraphId = 0;
|
|
|
|
// Set up the input network
|
|
std::cout << "\nModel information:" << std::endl;
|
|
std::vector<armnnTfLiteParser::BindingPointInfo> inputBindings;
|
|
std::vector<armnn::TensorInfo> inputTensorInfos;
|
|
std::vector<std::string> inputTensorNames = armnnparser->GetSubgraphInputTensorNames(subgraphId);
|
|
for (unsigned int i = 0; i < inputTensorNames.size() ; i++)
|
|
{
|
|
std::cout << "inputTensorNames[" << i << "] = " << inputTensorNames[i] << std::endl;
|
|
armnnTfLiteParser::BindingPointInfo inputBinding = armnnparser->GetNetworkInputBindingInfo(
|
|
subgraphId,
|
|
inputTensorNames[i]);
|
|
armnn::TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkId, inputBinding.first);
|
|
inputBindings.push_back(inputBinding);
|
|
inputTensorInfos.push_back(inputTensorInfo);
|
|
}
|
|
|
|
// Set up the output network
|
|
std::vector<armnnTfLiteParser::BindingPointInfo> outputBindings;
|
|
std::vector<armnn::TensorInfo> outputTensorInfos;
|
|
std::vector<std::string> outputTensorNames = armnnparser->GetSubgraphOutputTensorNames(subgraphId);
|
|
for (unsigned int i = 0; i < outputTensorNames.size() ; i++)
|
|
{
|
|
std::cout << "outputTensorNames[" << i << "] = " << outputTensorNames[i] << std::endl;
|
|
armnnTfLiteParser::BindingPointInfo outputBinding = armnnparser->GetNetworkOutputBindingInfo(
|
|
subgraphId,
|
|
outputTensorNames[i]);
|
|
armnn::TensorInfo outputTensorInfo = runtime->GetOutputTensorInfo(networkId, outputBinding.first);
|
|
outputBindings.push_back(outputBinding);
|
|
outputTensorInfos.push_back(outputTensorInfo);
|
|
}
|
|
|
|
// Allocate input tensors
|
|
unsigned int nb_inputs = armnn::numeric_cast<unsigned int>(inputTensorInfos.size());
|
|
armnn::InputTensors inputTensors;
|
|
std::vector<std::vector<float>> in;
|
|
for (unsigned int i = 0 ; i < nb_inputs ; i++)
|
|
{
|
|
std::vector<float> in_data(inputTensorInfos.at(i).GetNumElements());
|
|
in.push_back(in_data);
|
|
inputTensors.push_back({ inputBindings[i].first, armnn::ConstTensor(inputBindings[i].second, in[i].data()) });
|
|
}
|
|
|
|
// Allocate output tensors
|
|
unsigned int nb_ouputs = armnn::numeric_cast<unsigned int>(outputTensorInfos.size());
|
|
armnn::OutputTensors outputTensors;
|
|
std::vector<std::vector<float>> out;
|
|
for (unsigned int i = 0; i < nb_ouputs ; i++)
|
|
{
|
|
std::vector<float> out_data(outputTensorInfos.at(i).GetNumElements());
|
|
out.push_back(out_data);
|
|
outputTensors.push_back({ outputBindings[i].first, armnn::Tensor(outputBindings[i].second, out[i].data()) });
|
|
}
|
|
|
|
// Run the inferences
|
|
std::cout << "\ninferences are running: " << std::flush;
|
|
for (int i = 0 ; i < nb_loops ; i++)
|
|
{
|
|
struct timeval start_time, stop_time;
|
|
gettimeofday(&start_time, nullptr);
|
|
|
|
runtime->EnqueueWorkload(networkId, inputTensors, outputTensors);
|
|
|
|
gettimeofday(&stop_time, nullptr);
|
|
inferenceTimes.push_back((get_us(stop_time) - get_us(start_time)));
|
|
std::cout << "# " << std::flush;
|
|
}
|
|
|
|
auto maxInfTime = *std::max_element(inferenceTimes.begin(), inferenceTimes.end());
|
|
auto minInfTime = *std::min_element(inferenceTimes.begin(), inferenceTimes.end());
|
|
auto avgInfTime = accumulate(inferenceTimes.begin(), inferenceTimes.end(), 0.0) /
|
|
armnn::numeric_cast<double>(inferenceTimes.size());
|
|
std::cout << "\n\ninference time: ";
|
|
std::cout << "min=" << minInfTime << "us ";
|
|
std::cout << "max=" << maxInfTime << "us ";
|
|
std::cout << "avg=" << avgInfTime << "us" << std::endl;
|
|
|
|
return 0;
|
|
}
|