/Armednn

cross-platform modular neural network inference library, small and efficient

Primary LanguageC++MIT LicenseMIT

Introduction

a small and efficient modular neural network inference engine

features

  • stateless/stateful node
  • memory reuse
  • cross-platform

operators implemented

  • Concat
  • Split
  • Dense(Full connected)
  • Conv1D
  • LSTM

INSTALL

git submodule init 
git submodule update

compile Eigen without MKL

make 

compile Eigen with MKL

make MKLROOT=MKL/path/in/your/PC 

MKLROOT/include and MKLROOT/lib/intel64_lin should exist

1. Usage

1.1 simple dense

    int L=2000;
    int C=256;

    auto input_node=make_input("input");


    ConfigMap config;
    config.insert({"activation",{(std::string)"tanh"}});
    config.insert({"dim0",{(uint32_t)C}});
    config.insert({"dim1",{(uint32_t)C}});
    ParamMap param;
    param.insert({"weight",{Matrix::Identity(C,C)}});
    param.insert({"bias",{Matrix::Ones(1,C)}});

    Arm arm(config,param);

    auto dense_0=make_node("Dense",arm,input_node->output(),"dense-0");
    auto dense_1=make_node("Dense",arm,dense_0->output(),"dense-1");
    auto dense_2=make_node("Dense",arm,dense_1->output(),"dense-2");
    auto dense_3=make_node("Dense",arm,dense_2->output(),"dense-3");


    Matrix temp=Matrix::Identity(L,C);
    input_node->feed(temp);

    dense_0->run();
    dense_1->run();
    dense_2->run();
    dense_3->run();

1.2 stateful

    int L=2000;
    int C=256;

    auto input_node=make_input("input");


    ConfigMap config;
    config.insert({"activation",{(std::string)"tanh"}});
    config.insert({"dim0",{(uint32_t)C}});
    config.insert({"dim1",{(uint32_t)C}});
    ParamMap param;
    param.insert({"weight",{Matrix::Identity(C,C)}});
    param.insert({"bias",{Matrix::Ones(1,C)}});

    Arm arm(config,param);

    auto dense_0=make_node("Dense",arm,input_node->output(),"dense-0");
   
    
    ConfigMap config_split_0;
    config_split_0.insert({"num_split",{(uint32_t)2}});
    Arm arm_split_0(config_split_0);
  
    auto split_0=make_node("Split",arm_split_0,dense_0->output(),"split-0");


    
    ConfigMap config_fpool_0;
    config_fpool_0.insert({"output_channels",{(uint32_t)(C/2)}});
    Arm arm_fpool_0(config_fpool_0);
    auto init_state_node=make_state("init_state",Matrix::Zero(1,C/2));
    auto fpool_0=make_node("Fpooling",arm_fpool_0, {split_0->output(0),split_0->output(1),init_state_node->output(0)},"fpool-0");
    
    Matrix temp=Matrix::Identity(L,C);
    input_node->feed(temp);

    dense_0->run();
    split_0->run();
    init_state_node->run(); 
    fpool_0->run();
    fpool_0->run();
    fpool_0->run();
    init_state_node->run(); // reset state
    fpool_0->run();

1.3 share node memory

    int L=2000;
    int C=256;

    auto input_node=make_input("input");


    ConfigMap config;
    config.insert({"activation",{(std::string)"tanh"}});
    config.insert({"dim0",{(uint32_t)C}});
    config.insert({"dim1",{(uint32_t)C}});
    ParamMap param;
    param.insert({"weight",{Matrix::Identity(C,C)}});
    param.insert({"bias",{Matrix::Ones(1,C)}});

    Arm arm(config,param);

    auto dense_0=make_node("Dense",arm,input_node->output(),"dense-0");
    auto dense_1=make_node("Dense",arm,dense_0->output(),"dense-1");
    auto dense_2=make_node("Dense",arm,dense_1->output(),"dense-2",dense_0->output()); //share memory with dense_0
    auto dense_3=make_node("Dense",arm,dense_2->output(),"dense-3",dense_1->output()); //share memory with dense_1 
    
    /*
    can also be
    auto dense_0=make_node("Dense",arm,input_node->output(),"dense-0");
    auto dense_1=make_node("Dense",arm,dense_0->output(),"dense-1",input_node->output()); //share
    auto dense_2=make_node("Dense",arm,dense_1->output(),"dense-2",dense_0->output()); //share 
    auto dense_3=make_node("Dense",arm,dense_2->output(),"dense-3",input_node->output()); //share 
    */

    Matrix temp=Matrix::Identity(L,C);
    input_node->feed(temp);

    dense_0->run();
    dense_1->run();
    dense_2->run();
    dense_3->run();

    INFO<<dense_3->output(0)->get();

2. Data

Data d; 

d.allocate(10,10); // allocate 10x10

d.allocate(5,5); // no allocate here;

d.allocate(100,1000); // re allocate

audo mat=d.get() // return a Eigen::Map<Matrix>, not own any data, is a Mapping of "Real Matrix" in Data

Matrix a(100,100);

d.get()=a; // assign, 
mat=a; // the same 


// make sure enough size before assign
Matrix b(1000,1000);
d.allocate(1000,1000); // mat is useless now
auto mat2=d.get(); // call get() again after every allocate()
mat2=b;

  • remember to allocate() before doing any assign
  • if use local reference, call get() again after every allocate()

3. operation registry

REGISTER_OP(Dense).add_config<uint32_t>("dim0","dimension 0")
                     .add_config<uint32_t>("dim1","dimension 1")
                     .add_config<std::string>("activation","activation function type")
                     .add_param("weight",{"dim0","dim1"})
                     .add_param("bias",{"1","dim1"})
                     .set_num_input("1")
                     .set_num_output("1");

config

add_config<T>(std::string name, std::string desc)
  • T:type

  • name: config name

  • desc: config description

param

add_param(std::string name, std::vector<std::string> shape_mapping)
add_param(std::string name, std::function<std::vector<uint32_t>(ConfigMap&)> shape_func)
  • name: parameter name

  • shape_mapping: shape mapping to config names

  • shape_func: a function to get shape from Config

num_input & num_output

REGISTER_OP(Split).add_config<uint32_t>("num_split","number of split outputs, along last dim")
                     .set_num_input("1")
                     .set_num_output("num_split");
set_num_input(std::string num_input)
  • num_input: if exists in config, get mapping number from config; else get by (unit32_t)num_input
  • num_output: ...