Brooklyn/arch/arm/ARMnn/include/armnn/IRuntime.hpp

//
// Copyright © 2017 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once

#include "BackendOptions.hpp"
#include "INetwork.hpp"
#include "IProfiler.hpp"
#include "IWorkingMemHandle.hpp"
#include "IAsyncExecutionCallback.hpp"
#include "Tensor.hpp"
#include "Types.hpp"
#include "TypesUtils.hpp"
#include "profiling/ILocalPacketHandler.hpp"

#include <armnn/backends/ICustomAllocator.hpp>
#include <armnn/backends/IMemoryOptimizerStrategy.hpp>
#include <memory>
#include <map>

namespace armnn
{

using NetworkId = int;

class IGpuAccTunedParameters;

struct RuntimeImpl;
class IRuntime;
using IRuntimePtr = std::unique_ptr<IRuntime, void(*)(IRuntime* runtime)>;

struct INetworkProperties
{
    INetworkProperties(bool asyncEnabled,
                       MemorySource inputSource,
                       MemorySource outputSource,
                       bool profilingEnabled = false,
                       ProfilingDetailsMethod detailsMethod = ProfilingDetailsMethod::Undefined,
                       bool externalMemoryManagementEnabled = false)
        : m_ImportEnabled(inputSource != MemorySource::Undefined),
          m_ExportEnabled(outputSource != MemorySource::Undefined),
          m_AsyncEnabled(asyncEnabled),
          m_ProfilingEnabled(profilingEnabled),
          m_OutputNetworkDetailsMethod(detailsMethod),
          m_InputSource(inputSource),
          m_OutputSource(outputSource),
          m_ExternalMemoryManagementEnabled(externalMemoryManagementEnabled)
    {}

    /// Deprecated and will be removed in future release.
    const bool m_ImportEnabled;
    /// Deprecated and will be removed in future release.
    const bool m_ExportEnabled;

    const bool m_AsyncEnabled;

    const bool m_ProfilingEnabled;

    const ProfilingDetailsMethod m_OutputNetworkDetailsMethod;

    const MemorySource m_InputSource;
    const MemorySource m_OutputSource;

    const bool m_ExternalMemoryManagementEnabled;

    virtual ~INetworkProperties() {}
};

using namespace armnn::experimental;

class IRuntime
{
public:
    struct CreationOptions
    {
        CreationOptions()
            : m_GpuAccTunedParameters(nullptr)
            , m_EnableGpuProfiling(false)
            , m_DynamicBackendsPath("")
            , m_ProtectedMode(false)
            , m_CustomAllocatorMap()
            , m_MemoryOptimizerStrategyMap()
        {}

        /// If set, uses the GpuAcc tuned parameters from the given object when executing GPU workloads.
        /// It will also be updated with new tuned parameters if it is configured to do so.
        std::shared_ptr<IGpuAccTunedParameters> m_GpuAccTunedParameters;

        /// Setting this flag will allow the user to obtain GPU profiling information from the runtime.
        bool m_EnableGpuProfiling;

        /// Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive
        /// Only a single path is allowed for the override
        /// It defines the path to search for any [dynamic backend libraries](src/dynamic/README.md).
        std::string m_DynamicBackendsPath;

        /// Setting this flag will allow the user to create the Runtime in protected mode.
        /// It will run all the inferences on protected memory and will make sure that
        /// INetworkProperties::m_ImportEnabled set to true with MemorySource::DmaBufProtected option
        /// This requires that the backend supports Protected Memory and has an allocator capable of
        /// allocating Protected Memory associated with it.
        bool m_ProtectedMode;

        /// @brief A map to define a custom memory allocator for specific backend Ids.
        ///
        /// @details  A Custom Allocator is used for allocation of working memory in the backends.
        /// Set this if you need to take control of how memory is allocated on a backend. Required for
        /// Protected Mode in order to correctly allocate Protected Memory
        ///
        /// @note Only supported for GpuAcc
        std::map<BackendId, std::shared_ptr<ICustomAllocator>> m_CustomAllocatorMap;

        /// @brief A map to define a custom memory optimizer strategy for specific backend Ids.
        ///
        /// @details  A Memory Optimizer Strategy provides a solution to an abstract representation of
        /// a network's memory requirements. This can also be used to return a pre-computed solution
        /// for a specific network. Set this if you want to implement a Custom Memory Optimizer Strategy
        /// for a given backend.
        std::map<BackendId, std::shared_ptr<IMemoryOptimizerStrategy>> m_MemoryOptimizerStrategyMap;

        struct ExternalProfilingOptions
        {
            ExternalProfilingOptions()
                : m_EnableProfiling(false)
                , m_TimelineEnabled(false)
                , m_OutgoingCaptureFile("")
                , m_IncomingCaptureFile("")
                , m_FileOnly(false)
                , m_CapturePeriod(LOWEST_CAPTURE_PERIOD)
                , m_FileFormat("binary")
                , m_LocalPacketHandlers()
            {}

            /// Indicates whether external profiling is enabled or not.
            bool        m_EnableProfiling;
            /// Indicates whether external timeline profiling is enabled or not.
            bool        m_TimelineEnabled;
            /// Path to a file in which outgoing timeline profiling messages will be stored.
            std::string m_OutgoingCaptureFile;
            /// Path to a file in which incoming timeline profiling messages will be stored.
            std::string m_IncomingCaptureFile;
            /// Enable profiling output to file only.
            bool        m_FileOnly;
            /// The duration at which captured profiling messages will be flushed.
            uint32_t    m_CapturePeriod;
            /// The format of the file used for outputting profiling data.
            std::string m_FileFormat;
            std::vector<armnn::profiling::ILocalPacketHandlerSharedPtr> m_LocalPacketHandlers;
        };
        ExternalProfilingOptions m_ProfilingOptions;

        /// Pass backend specific options.
        ///
        /// For example, to enable GpuAcc tuning add the following
        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
        /// m_BackendOption.emplace_back(
        ///     BackendOptions{"GpuAcc",
        ///       {
        ///         {"TuningLevel", 2},
        ///         {"TuningFile", filename}
        ///         {"MemoryOptimizerStrategy", strategyname}
        ///       }
        ///     });
        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        /// Execute representative workloads through the runtime to generate tuning data.
        /// The tuning file is written once the runtime is destroyed

        /// To execute with the tuning data, start up with just the tuning file specified.
        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~.cpp
        /// m_BackendOption.emplace_back(
        ///     BackendOptions{"GpuAcc",
        ///       {
        ///         {"TuningFile", filename}
        ///       }
        ///     });
        /// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

        /// The following backend options are available:
        /// AllBackends:
        ///   "MemoryOptimizerStrategy" : string [stategynameString]
        ///    (Existing Memory Optimizer Strategies: ConstantMemoryStrategy)
        /// GpuAcc:
        ///   "TuningLevel" : int [0..3] (0=UseOnly(default) | 1=RapidTuning | 2=NormalTuning | 3=ExhaustiveTuning)
        ///   "TuningFile" : string [filenameString]
        ///   "KernelProfilingEnabled" : bool [true | false]
        std::vector<BackendOptions> m_BackendOptions;
    };

    static IRuntime* CreateRaw(const CreationOptions& options);
    static IRuntimePtr Create(const CreationOptions& options);
    static void Destroy(IRuntime* runtime);

    /// Loads a complete network into the IRuntime.
    /// @param [out] networkIdOut - Unique identifier for the network is returned in this reference.
    /// @param [in] network - Complete network to load into the IRuntime.
    /// The runtime takes ownership of the network once passed in.
    /// @return armnn::Status
    Status LoadNetwork(NetworkId& networkIdOut, IOptimizedNetworkPtr network);

    /// Load a complete network into the IRuntime.
    /// @param [out] networkIdOut Unique identifier for the network is returned in this reference.
    /// @param [in] network Complete network to load into the IRuntime.
    /// @param [out] errorMessage Error message if there were any errors.
    /// The runtime takes ownership of the network once passed in.
    /// @return armnn::Status
    Status LoadNetwork(NetworkId& networkIdOut,
                       IOptimizedNetworkPtr network,
                       std::string& errorMessage);

    Status LoadNetwork(NetworkId& networkIdOut,
                       IOptimizedNetworkPtr network,
                       std::string& errorMessage,
                       const INetworkProperties& networkProperties);

    TensorInfo GetInputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;
    TensorInfo GetOutputTensorInfo(NetworkId networkId, LayerBindingId layerId) const;

    /// ImportInputs separates the importing and mapping of InputTensors from network execution.
    /// Allowing for a set of InputTensors to be imported and mapped once, but used in execution many times.
    /// This function is not thread safe and must not be used while other threads are calling Execute().
    /// Only compatible with AsyncEnabled networks and aligned memory import
    std::vector<ImportedInputId> ImportInputs(NetworkId networkId, const InputTensors& inputTensors,
                                              MemorySource forceImportMemorySource = MemorySource::Undefined);

    /// ImportOutputs separates the importing and mapping of OutputTensors from network execution.
    /// Allowing for a set of OutputTensors to be imported and mapped once, but used in execution many times.
    /// This function is not thread safe and must not be used while other threads are calling Execute().
    /// Only compatible with AsyncEnabled networks and aligned memory import
    std::vector<ImportedOutputId> ImportOutputs(NetworkId networkId, const OutputTensors& outputTensors,
                                                MemorySource forceImportMemorySource = MemorySource::Undefined);

    /// Un-import and delete the imported InputTensor/s
    /// This function is not thread safe and must not be used while other threads are calling Execute().
    /// Only compatible with AsyncEnabled networks
    void ClearImportedInputs(NetworkId networkId, const std::vector<ImportedInputId> inputIds);

    /// Un-import and delete the imported OutputTensor/s
    /// This function is not thread safe and must not be used while other threads are calling Execute().
    /// Only compatible with AsyncEnabled networks
    void ClearImportedOutputs(NetworkId networkId, const std::vector<ImportedOutputId> outputIds);

    /// Evaluates a network using input in inputTensors and outputs filled into outputTensors
    Status EnqueueWorkload(NetworkId networkId,
                           const InputTensors& inputTensors,
                           const OutputTensors& outputTensors,
                           std::vector<ImportedInputId> preImportedInputIds = {},
                           std::vector<ImportedOutputId> preImportedOutputIds = {});

    /// This is an experimental function.
    /// Evaluates a network using input in inputTensors and outputs filled into outputTensors.
    /// This function performs a thread safe execution of the network. Returns once execution is complete.
    /// Will block until this and any other thread using the same workingMem object completes.
    Status Execute(IWorkingMemHandle& workingMemHandle,
                   const InputTensors& inputTensors,
                   const OutputTensors& outputTensors,
                   std::vector<ImportedInputId> preImportedInputs = {},
                   std::vector<ImportedOutputId> preImportedOutputs = {});

    /// Unloads a network from the IRuntime.
    /// At the moment this only removes the network from the m_Impl->m_Network.
    /// This might need more work in the future to be AndroidNN compliant.
    /// @param [in] networkId - Unique identifier for the network to be unloaded. Generated in LoadNetwork().
    /// @return armnn::Status
    Status UnloadNetwork(NetworkId networkId);

    const IDeviceSpec& GetDeviceSpec() const;

    /// Create a new unique WorkingMemHandle object. Create multiple handles if you wish to have
    /// overlapped Execution by calling this function from different threads.
    std::unique_ptr<IWorkingMemHandle> CreateWorkingMemHandle(NetworkId networkId);

    /// Gets the profiler corresponding to the given network id.
    /// @param networkId The id of the network for which to get the profile.
    /// @return A pointer to the requested profiler, or nullptr if not found.
    const std::shared_ptr<IProfiler> GetProfiler(NetworkId networkId) const;

    /// Registers a callback function to debug layers performing custom computations on intermediate tensors.
    /// @param networkId The id of the network to register the callback.
    /// @param func callback function to pass to the debug layer.
    void RegisterDebugCallback(NetworkId networkId, const DebugCallbackFunction& func);

protected:
    IRuntime();
    IRuntime(const IRuntime::CreationOptions& options);
    ~IRuntime();

    std::unique_ptr<RuntimeImpl> pRuntimeImpl;
};


/// The following API is replaced by the backend options API.
using IGpuAccTunedParametersPtr = std::shared_ptr<IGpuAccTunedParameters>;

/// Manages a set of GpuAcc parameters which have been tuned for maximum performance.
/// Passes an instance of this object to the IRuntime::Create() method (via IRuntime::CreationOptions) to use it
/// for all GPU workload execution.
///
/// Can be created in two modes:
///     - In UseTunedParameters mode, the parameters stored in this object are used to execute GPU workloads.
///     - In UpdateTunedParameters mode, additionally, whenever a GPU workload is executed for the first time, the
///       optimum parameters will be found and stored in this object. WARNING - This tuning can be slow.
///
/// The parameters can be loaded from and saved to a file so that you can first run a slow initial read-write
/// execution, save the parameters for later and then run fast read-only executions using the optimised parameters.
class IGpuAccTunedParameters
{
public:
    enum class Mode
    {
        UseTunedParameters,
        UpdateTunedParameters
    };

    enum class TuningLevel
    {
        Rapid = 1,
        Normal = 2,
        Exhaustive = 3
    };

    /// Creates an IClTunedParameters with the given mode.
    /// @{
    static IGpuAccTunedParameters* CreateRaw(Mode mode, TuningLevel tunerMode);
    static IGpuAccTunedParametersPtr Create(Mode mode, TuningLevel tunerMode);
    /// @}
    static void Destroy(IGpuAccTunedParameters* params);

    /// Loads an existing set of tuned parameters from the given file.
    /// If there is an error loading the file, an armnn::Exception is thrown.
    virtual void Load(const char* filename) = 0;

    /// Saves the current set of tuned parameters to the given file.
    /// If there is an error saving to the file, an armnn::Exception is thrown.
    virtual void Save(const char* filename) const = 0;

protected:
    virtual ~IGpuAccTunedParameters() {};
};

} // namespace armnn