Signed-off-by: Mingyu Yin <mingyu.yin@intel.com>tags/n4.4
@@ -3,6 +3,7 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_io_proc.o | |||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native.o | OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native.o | ||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layers.o | OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layers.o | ||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_avgpool.o | OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_avgpool.o | ||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_dense.o | |||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_pad.o | OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_pad.o | ||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_conv2d.o | OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_conv2d.o | ||||
OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_depth2space.o | OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_depth2space.o | ||||
@@ -45,11 +45,13 @@ typedef enum { | |||||
DLT_MATH_BINARY = 5, | DLT_MATH_BINARY = 5, | ||||
DLT_MATH_UNARY = 6, | DLT_MATH_UNARY = 6, | ||||
DLT_AVG_POOL = 7, | DLT_AVG_POOL = 7, | ||||
DLT_DENSE = 8, | |||||
DLT_COUNT | DLT_COUNT | ||||
} DNNLayerType; | } DNNLayerType; | ||||
typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_OUTPUT} DNNOperandType; | typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_OUTPUT} DNNOperandType; | ||||
typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNPaddingParam; | typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNPaddingParam; | ||||
typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc; | |||||
typedef struct Layer{ | typedef struct Layer{ | ||||
DNNLayerType type; | DNNLayerType type; | ||||
@@ -23,7 +23,6 @@ | |||||
#include "dnn_backend_native.h" | #include "dnn_backend_native.h" | ||||
typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc; | |||||
typedef struct ConvolutionalParams{ | typedef struct ConvolutionalParams{ | ||||
int32_t input_num, output_num, kernel_size; | int32_t input_num, output_num, kernel_size; | ||||
@@ -0,0 +1,151 @@ | |||||
/* | |||||
* Copyright (c) 2020 | |||||
* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "libavutil/avassert.h" | |||||
#include "dnn_backend_native_layer_dense.h" | |||||
int dnn_load_layer_dense(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num) | |||||
{ | |||||
DenseParams *dense_params; | |||||
int kernel_size; | |||||
int dnn_size = 0; | |||||
dense_params = av_malloc(sizeof(*dense_params)); | |||||
if (!dense_params) | |||||
return 0; | |||||
dense_params->activation = (int32_t)avio_rl32(model_file_context); | |||||
dense_params->input_num = (int32_t)avio_rl32(model_file_context); | |||||
dense_params->output_num = (int32_t)avio_rl32(model_file_context); | |||||
dense_params->has_bias = (int32_t)avio_rl32(model_file_context); | |||||
dnn_size += 16; | |||||
kernel_size = dense_params->input_num * dense_params->output_num; | |||||
dnn_size += kernel_size * 4; | |||||
if (dense_params->has_bias) | |||||
dnn_size += dense_params->output_num * 4; | |||||
if (dnn_size > file_size || dense_params->input_num <= 0 || | |||||
dense_params->output_num <= 0){ | |||||
av_freep(&dense_params); | |||||
return 0; | |||||
} | |||||
dense_params->kernel = av_malloc(kernel_size * sizeof(float)); | |||||
if (!dense_params->kernel) { | |||||
av_freep(&dense_params); | |||||
return 0; | |||||
} | |||||
for (int i = 0; i < kernel_size; ++i) { | |||||
dense_params->kernel[i] = av_int2float(avio_rl32(model_file_context)); | |||||
} | |||||
dense_params->biases = NULL; | |||||
if (dense_params->has_bias) { | |||||
dense_params->biases = av_malloc(dense_params->output_num * sizeof(float)); | |||||
if (!dense_params->biases){ | |||||
av_freep(&dense_params->kernel); | |||||
av_freep(&dense_params); | |||||
return 0; | |||||
} | |||||
for (int i = 0; i < dense_params->output_num; ++i){ | |||||
dense_params->biases[i] = av_int2float(avio_rl32(model_file_context)); | |||||
} | |||||
} | |||||
layer->params = dense_params; | |||||
layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context); | |||||
layer->output_operand_index = (int32_t)avio_rl32(model_file_context); | |||||
dnn_size += 8; | |||||
if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) { | |||||
return 0; | |||||
} | |||||
return dnn_size; | |||||
} | |||||
int dnn_execute_layer_dense(DnnOperand *operands, const int32_t *input_operand_indexes, | |||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx) | |||||
{ | |||||
float *output; | |||||
int32_t input_operand_index = input_operand_indexes[0]; | |||||
int number = operands[input_operand_index].dims[0]; | |||||
int height = operands[input_operand_index].dims[1]; | |||||
int width = operands[input_operand_index].dims[2]; | |||||
int channel = operands[input_operand_index].dims[3]; | |||||
const float *input = operands[input_operand_index].data; | |||||
const DenseParams *dense_params = (const DenseParams *)parameters; | |||||
int src_linesize = width * channel; | |||||
DnnOperand *output_operand = &operands[output_operand_index]; | |||||
output_operand->dims[0] = number; | |||||
output_operand->dims[1] = height; | |||||
output_operand->dims[2] = width; | |||||
output_operand->dims[3] = dense_params->output_num; | |||||
output_operand->data_type = operands[input_operand_index].data_type; | |||||
output_operand->length = calculate_operand_data_length(output_operand); | |||||
if (output_operand->length <= 0) { | |||||
av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n"); | |||||
return DNN_ERROR; | |||||
} | |||||
output_operand->data = av_realloc(output_operand->data, output_operand->length); | |||||
if (!output_operand->data) { | |||||
av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n"); | |||||
return DNN_ERROR; | |||||
} | |||||
output = output_operand->data; | |||||
av_assert0(channel == dense_params->input_num); | |||||
for (int y = 0; y < height; ++y) { | |||||
for (int x = 0; x < width; ++x) { | |||||
for (int n_filter = 0; n_filter < dense_params->output_num; ++n_filter) { | |||||
if (dense_params->has_bias) | |||||
output[n_filter] = dense_params->biases[n_filter]; | |||||
else | |||||
output[n_filter] = 0.f; | |||||
for (int ch = 0; ch < dense_params->input_num; ++ch) { | |||||
float input_pel; | |||||
input_pel = input[y * src_linesize + x * dense_params->input_num + ch]; | |||||
output[n_filter] += input_pel * dense_params->kernel[n_filter*dense_params->input_num + ch]; | |||||
} | |||||
switch (dense_params->activation){ | |||||
case RELU: | |||||
output[n_filter] = FFMAX(output[n_filter], 0.0); | |||||
break; | |||||
case TANH: | |||||
output[n_filter] = 2.0f / (1.0f + exp(-2.0f * output[n_filter])) - 1.0f; | |||||
break; | |||||
case SIGMOID: | |||||
output[n_filter] = 1.0f / (1.0f + exp(-output[n_filter])); | |||||
break; | |||||
case NONE: | |||||
break; | |||||
case LEAKY_RELU: | |||||
output[n_filter] = FFMAX(output[n_filter], 0.0) + 0.2 * FFMIN(output[n_filter], 0.0); | |||||
} | |||||
} | |||||
output += dense_params->output_num; | |||||
} | |||||
} | |||||
return 0; | |||||
} |
@@ -0,0 +1,37 @@ | |||||
/* | |||||
* Copyright (c) 2020 | |||||
* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_DENSE_H | |||||
#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_DENSE_H | |||||
#include "dnn_backend_native.h" | |||||
typedef struct DenseParams{ | |||||
int32_t input_num, output_num; | |||||
DNNActivationFunc activation; | |||||
int32_t has_bias; | |||||
float *kernel; | |||||
float *biases; | |||||
} DenseParams; | |||||
int dnn_load_layer_dense(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num); | |||||
int dnn_execute_layer_dense(DnnOperand *operands, const int32_t *input_operand_indexes, | |||||
int32_t output_operand_index, const void *parameters, NativeContext *ctx); | |||||
#endif |
@@ -27,6 +27,7 @@ | |||||
#include "dnn_backend_native_layer_mathbinary.h" | #include "dnn_backend_native_layer_mathbinary.h" | ||||
#include "dnn_backend_native_layer_mathunary.h" | #include "dnn_backend_native_layer_mathunary.h" | ||||
#include "dnn_backend_native_layer_avgpool.h" | #include "dnn_backend_native_layer_avgpool.h" | ||||
#include "dnn_backend_native_layer_dense.h" | |||||
LayerFunc layer_funcs[DLT_COUNT] = { | LayerFunc layer_funcs[DLT_COUNT] = { | ||||
{NULL, NULL}, | {NULL, NULL}, | ||||
@@ -37,4 +38,5 @@ LayerFunc layer_funcs[DLT_COUNT] = { | |||||
{dnn_execute_layer_math_binary, dnn_load_layer_math_binary}, | {dnn_execute_layer_math_binary, dnn_load_layer_math_binary}, | ||||
{dnn_execute_layer_math_unary, dnn_load_layer_math_unary}, | {dnn_execute_layer_math_unary, dnn_load_layer_math_unary}, | ||||
{dnn_execute_layer_avg_pool, dnn_load_layer_avg_pool}, | {dnn_execute_layer_avg_pool, dnn_load_layer_avg_pool}, | ||||
{dnn_execute_layer_dense, dnn_load_layer_dense}, | |||||
}; | }; |
@@ -5,3 +5,4 @@ | |||||
/dnn-layer-mathbinary-test | /dnn-layer-mathbinary-test | ||||
/dnn-layer-mathunary-test | /dnn-layer-mathunary-test | ||||
/dnn-layer-avgpool-test | /dnn-layer-avgpool-test | ||||
/dnn-layer-dense-test |
@@ -0,0 +1,131 @@ | |||||
/* | |||||
* Copyright (c) 2020 | |||||
* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include <stdio.h> | |||||
#include <string.h> | |||||
#include <math.h> | |||||
#include "libavfilter/dnn/dnn_backend_native_layer_dense.h" | |||||
#define EPSON 0.00001 | |||||
static int test(void) | |||||
{ | |||||
// the input data and expected data are generated with below python code. | |||||
/* | |||||
x = tf.placeholder(tf.float32, shape=[1, None, None, 3]) | |||||
y = tf.layers.dense(input_x, 3, activation=tf.nn.sigmoid, bias_initializer=tf.keras.initializers.he_normal()) | |||||
data = np.random.rand(1, 5, 6, 3); | |||||
sess=tf.Session() | |||||
sess.run(tf.global_variables_initializer()) | |||||
weights = dict([(var.name, sess.run(var)) for var in tf.trainable_variables()]) | |||||
kernel = weights['dense/kernel:0'] | |||||
kernel = np.transpose(kernel, [1, 0]) | |||||
print("kernel:") | |||||
print(kernel.shape) | |||||
print(list(kernel.flatten())) | |||||
bias = weights['dense/bias:0'] | |||||
print("bias:") | |||||
print(bias.shape) | |||||
print(list(bias.flatten())) | |||||
output = sess.run(y, feed_dict={x: data}) | |||||
print("input:") | |||||
print(data.shape) | |||||
print(list(data.flatten())) | |||||
print("output:") | |||||
print(output.shape) | |||||
print(list(output.flatten())) | |||||
*/ | |||||
ConvolutionalParams params; | |||||
DnnOperand operands[2]; | |||||
int32_t input_indexes[1]; | |||||
float input[1*5*6*3] = { | |||||
0.5552418686576308, 0.20653189262022464, 0.31115120939398877, 0.5897014433221428, 0.37340078861060655, 0.6470921693941893, 0.8039950367872679, 0.8762700891949274, | |||||
0.6556655583829558, 0.5911096107039339, 0.18640250865290997, 0.2803248779238966, 0.31586613136402053, 0.9447300740056483, 0.9443980824873418, 0.8158851991115941, | |||||
0.5631010340387631, 0.9407402251929046, 0.6485434876551682, 0.5631376966470001, 0.17581924875609634, 0.7033802439103178, 0.04802402495561675, 0.9183681450194972, | |||||
0.46059317944364, 0.07964160481596883, 0.871787076270302, 0.973743142324361, 0.15923146943258415, 0.8212946080584571, 0.5415954459227064, 0.9552813822803975, | |||||
0.4908552668172057, 0.33723691635292274, 0.46588057864910026, 0.8994239961321776, 0.09845220457674186, 0.1713400292123486, 0.39570294912818826, 0.08018956486392803, | |||||
0.5290478278169032, 0.7141906125920976, 0.0320878067840098, 0.6412406575332606, 0.0075712007102423096, 0.7150828462386156, 0.1311989216968138, 0.4706847944253756, | |||||
0.5447610794883336, 0.3430923933318001, 0.536082357943209, 0.4371629342483694, 0.40227962985019927, 0.3553806249465469, 0.031806622424259245, 0.7053916426174, | |||||
0.3261570237309813, 0.419500213292063, 0.3155691223480851, 0.05664028113178088, 0.3636491555914486, 0.8502419746667123, 0.9836596530684955, 0.1628681802975801, | |||||
0.09410832912479894, 0.28407218939480294, 0.7983417928813697, 0.24132158596506748, 0.8154729498062224, 0.29173768373895637, 0.13407102008052096, 0.18705786678800385, | |||||
0.7167943621295573, 0.09222004247174376, 0.2319220738766018, 0.17708964382285064, 0.1391440370249517, 0.3254088083499256, 0.4013916894718289, 0.4819742663322323, | |||||
0.15080103744648077, 0.9302407847555013, 0.9397597961319524, 0.5719200825550793, 0.9538938024682824, 0.9583882089203861, 0.5168861091262276, 0.1926396841842669, | |||||
0.6781176744337578, 0.719366447288566 | |||||
}; | |||||
float expected_output[1*5*6*3] = { | |||||
-0.3921688, -0.9243112, -0.29659146, -0.64000785, -0.9466343, -0.62125254, -0.71759033, -0.9171336, -0.735589, -0.34365994, | |||||
-0.92100817, -0.23903961, -0.8962277, -0.9521279, -0.90962386, -0.7488303, -0.9563761, -0.7701762, -0.40800542, -0.87684774, | |||||
-0.3339763, -0.6354543, -0.97068924, -0.6246325, -0.6992075, -0.9706726, -0.6818918, -0.51864433, -0.9592881, -0.51187396, | |||||
-0.7423632, -0.89911884, -0.7457824, -0.82009757, -0.96402895, -0.8235518, -0.61980766, -0.94494647, -0.5410502, -0.8281218, | |||||
-0.95508635, -0.8201453, -0.5937325, -0.8679507, -0.500767, -0.39430764, -0.93967676, -0.32183182, -0.58913624, -0.939717, | |||||
-0.55179894, -0.55004454, -0.9214453, -0.4889004, -0.75294703, -0.9118363, -0.7200309, -0.3248641, -0.8878874, -0.18977344, | |||||
-0.8873837, -0.9571257, -0.90145934, -0.50521654, -0.93739635, -0.39051685, -0.61143184, -0.9591179, -0.605999, -0.40008977, | |||||
-0.92219675, -0.26732883, -0.19607787, -0.9172511, -0.07068595, -0.5409857, -0.9387041, -0.44181606, -0.4705004, -0.8899935, | |||||
-0.37997037, -0.66105115, -0.89754754, -0.68141997, -0.6324047, -0.886776, -0.65066385, -0.8334821, -0.94801456, -0.83297 | |||||
}; | |||||
float *output; | |||||
float kernel[3*3] = { | |||||
0.56611896, -0.5144603, -0.82600045, 0.19219112, 0.3835776, -0.7475352, 0.5209291, -0.6301091, -0.99442935}; | |||||
float bias[3] = {-0.3654299, -1.5711838, -0.15546428}; | |||||
params.activation = TANH; | |||||
params.has_bias = 1; | |||||
params.biases = bias; | |||||
params.input_num = 3; | |||||
params.kernel = kernel; | |||||
params.output_num = 3; | |||||
operands[0].data = input; | |||||
operands[0].dims[0] = 1; | |||||
operands[0].dims[1] = 5; | |||||
operands[0].dims[2] = 6; | |||||
operands[0].dims[3] = 3; | |||||
operands[1].data = NULL; | |||||
input_indexes[0] = 0; | |||||
dnn_execute_layer_dense(operands, input_indexes, 1, ¶ms, NULL); | |||||
output = operands[1].data; | |||||
for (int i = 0; i < sizeof(expected_output) / sizeof(float); i++) { | |||||
if (fabs(output[i] - expected_output[i]) > EPSON) { | |||||
printf("at index %d, output: %f, expected_output: %f\n", i, output[i], expected_output[i]); | |||||
av_freep(&output); | |||||
return 1; | |||||
} | |||||
} | |||||
av_freep(&output); | |||||
return 0; | |||||
} | |||||
int main(int argc, char **argv) | |||||
{ | |||||
if (test()) | |||||
return 1; | |||||
return 0; | |||||
} |
@@ -48,9 +48,9 @@ class Operand(object): | |||||
self.used_count = self.used_count + 1 | self.used_count = self.used_count + 1 | ||||
def __str__(self): | def __str__(self): | ||||
return "{}: (name: {}, iotype: {}, dtype: {}, dims: ({},{},{},{}) used_count: {})".format(self.index, | |||||
return "{}: (name: {}, iotype: {}, dtype: {}, dims: {}, used_count: {})".format(self.index, | |||||
self.name, self.iotype2str[self.iotype], self.dtype2str[self.dtype], | self.name, self.iotype2str[self.iotype], self.dtype2str[self.dtype], | ||||
self.dims[0], self.dims[1], self.dims[2], self.dims[3], self.used_count) | |||||
self.dims, self.used_count) | |||||
def __lt__(self, other): | def __lt__(self, other): | ||||
return self.index < other.index | return self.index < other.index | ||||
@@ -71,8 +71,10 @@ class TFConverter: | |||||
self.converted_nodes = set() | self.converted_nodes = set() | ||||
self.conv2d_scope_names = set() | self.conv2d_scope_names = set() | ||||
self.conv2d_scopename_inputname_dict = {} | self.conv2d_scopename_inputname_dict = {} | ||||
self.dense_scope_names = set() | |||||
self.dense_scopename_inputname_dict = {} | |||||
self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, | self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, | ||||
'MathBinary':5, 'MathUnary':6, 'AvgPool':7} | |||||
'MathBinary':5, 'MathUnary':6, 'AvgPool':7, 'MatMul':8} | |||||
self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4, 'FloorMod':5} | self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4, 'FloorMod':5} | ||||
self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, | self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, | ||||
'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, | 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, | ||||
@@ -126,6 +128,22 @@ class TFConverter: | |||||
return knode, bnode, dnode, anode | return knode, bnode, dnode, anode | ||||
def get_dense_params(self, dense_scope_name): | |||||
knode = self.name_node_dict[dense_scope_name + '/kernel'] | |||||
bnode = self.name_node_dict.get(dense_scope_name + '/bias') | |||||
# the BiasAdd name is possible be changed into the output name, | |||||
# if activation is None, and BiasAdd.next is the last op which is Identity | |||||
anode = None | |||||
if bnode: | |||||
if dense_scope_name + '/BiasAdd' in self.edges: | |||||
anode = self.edges[dense_scope_name + '/BiasAdd'][0] | |||||
if anode.op not in self.conv_activations: | |||||
anode = None | |||||
else: | |||||
anode = None | |||||
return knode, bnode, anode | |||||
def dump_complex_conv2d_to_file(self, node, f): | def dump_complex_conv2d_to_file(self, node, f): | ||||
assert(node.op == 'Conv2D') | assert(node.op == 'Conv2D') | ||||
self.layer_number = self.layer_number + 1 | self.layer_number = self.layer_number + 1 | ||||
@@ -181,6 +199,57 @@ class TFConverter: | |||||
output_operand_index = self.add_operand(self.edges[bnode.name][0].name, Operand.IOTYPE_OUTPUT) | output_operand_index = self.add_operand(self.edges[bnode.name][0].name, Operand.IOTYPE_OUTPUT) | ||||
np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f) | np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f) | ||||
def dump_dense_to_file(self, node, f): | |||||
assert(node.op == 'MatMul') | |||||
self.layer_number = self.layer_number + 1 | |||||
self.converted_nodes.add(node.name) | |||||
scope_name = TFConverter.get_scope_name(node.name) | |||||
#knode for kernel, bnode for bias, anode for activation | |||||
knode, bnode, anode = self.get_dense_params(scope_name.split('/')[0]) | |||||
if bnode is not None: | |||||
has_bias = 1 | |||||
btensor = bnode.attr['value'].tensor | |||||
if btensor.tensor_shape.dim[0].size == 1: | |||||
bias = struct.pack("f", btensor.float_val[0]) | |||||
else: | |||||
bias = btensor.tensor_content | |||||
else: | |||||
has_bias = 0 | |||||
if anode is not None: | |||||
activation = anode.op | |||||
else: | |||||
activation = 'None' | |||||
ktensor = knode.attr['value'].tensor | |||||
in_channels = ktensor.tensor_shape.dim[0].size | |||||
out_channels = ktensor.tensor_shape.dim[1].size | |||||
if in_channels * out_channels == 1: | |||||
kernel = np.float32(ktensor.float_val[0]) | |||||
else: | |||||
kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32) | |||||
kernel = kernel.reshape(in_channels, out_channels) | |||||
kernel = np.transpose(kernel, [1, 0]) | |||||
np.array([self.op2code[node.op], self.conv_activations[activation], in_channels, out_channels, has_bias], dtype=np.uint32).tofile(f) | |||||
kernel.tofile(f) | |||||
if has_bias: | |||||
f.write(bias) | |||||
input_name = self.dense_scopename_inputname_dict[scope_name.split('/')[0]] | |||||
input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT) | |||||
if anode is not None: | |||||
output_operand_index = self.add_operand(anode.name, Operand.IOTYPE_OUTPUT) | |||||
else: | |||||
if bnode is not None: | |||||
output_operand_index = self.add_operand(self.edges[bnode.name][0].name, Operand.IOTYPE_OUTPUT) | |||||
else: | |||||
output_operand_index = self.add_operand(self.edges[scope_name+'/concat_1'][0].name, Operand.IOTYPE_OUTPUT) | |||||
np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f) | |||||
def dump_simple_conv2d_to_file(self, node, f): | def dump_simple_conv2d_to_file(self, node, f): | ||||
assert(node.op == 'Conv2D') | assert(node.op == 'Conv2D') | ||||
@@ -343,9 +412,19 @@ class TFConverter: | |||||
if node.op == 'Conv2D': | if node.op == 'Conv2D': | ||||
self.dump_complex_conv2d_to_file(node, f) | self.dump_complex_conv2d_to_file(node, f) | ||||
continue | continue | ||||
if self.in_dense_scope(node.name): | |||||
if node.op == 'MatMul': | |||||
self.dump_dense_to_file(node, f) | |||||
continue | |||||
if node.op == 'Conv2D': | if node.op == 'Conv2D': | ||||
self.dump_simple_conv2d_to_file(node, f) | self.dump_simple_conv2d_to_file(node, f) | ||||
continue | |||||
if node.name in self.output_names: | |||||
input_name = self.id_different_scope_dict[node.name] | |||||
if TFConverter.get_scope_name(input_name)!=TFConverter.get_scope_name(node.name): | |||||
continue | |||||
if node.op == 'AvgPool': | if node.op == 'AvgPool': | ||||
self.dump_avg_pool_to_file(node, f) | self.dump_avg_pool_to_file(node, f) | ||||
elif node.op == 'DepthToSpace': | elif node.op == 'DepthToSpace': | ||||
@@ -367,7 +446,7 @@ class TFConverter: | |||||
np.array([operand.index, len(operand.name)], dtype=np.uint32).tofile(f) | np.array([operand.index, len(operand.name)], dtype=np.uint32).tofile(f) | ||||
f.write(operand.name.encode('utf-8')) | f.write(operand.name.encode('utf-8')) | ||||
np.array([operand.iotype, operand.dtype], dtype=np.uint32).tofile(f) | np.array([operand.iotype, operand.dtype], dtype=np.uint32).tofile(f) | ||||
np.array([operand.dims[0], operand.dims[1], operand.dims[2], operand.dims[3]], dtype=np.uint32).tofile(f) | |||||
np.array(operand.dims, dtype=np.uint32).tofile(f) | |||||
def dump_to_file(self): | def dump_to_file(self): | ||||
@@ -396,6 +475,7 @@ class TFConverter: | |||||
def remove_identity(self): | def remove_identity(self): | ||||
self.id_different_scope_dict = {} | |||||
id_nodes = [] | id_nodes = [] | ||||
id_dict = {} | id_dict = {} | ||||
for node in self.nodes: | for node in self.nodes: | ||||
@@ -408,6 +488,7 @@ class TFConverter: | |||||
self.name_node_dict[input].name = name | self.name_node_dict[input].name = name | ||||
self.name_node_dict[name] = self.name_node_dict[input] | self.name_node_dict[name] = self.name_node_dict[input] | ||||
del self.name_node_dict[input] | del self.name_node_dict[input] | ||||
self.id_different_scope_dict[name] = input | |||||
else: | else: | ||||
id_dict[name] = input | id_dict[name] = input | ||||
@@ -449,8 +530,18 @@ class TFConverter: | |||||
return False | return False | ||||
def generate_conv2d_scope_info(self): | |||||
# mostly, conv2d is a sub block in graph, get the scope name | |||||
def in_dense_scope(self, name): | |||||
inner_scope = TFConverter.get_scope_name(name) | |||||
if inner_scope == "": | |||||
return False; | |||||
for scope in self.dense_scope_names: | |||||
index = inner_scope.find(scope) | |||||
if index == 0: | |||||
return True | |||||
return False | |||||
def generate_sub_block_op_scope_info(self): | |||||
# mostly, conv2d/dense is a sub block in graph, get the scope name | |||||
for node in self.nodes: | for node in self.nodes: | ||||
if node.op == 'Conv2D': | if node.op == 'Conv2D': | ||||
scope = TFConverter.get_scope_name(node.name) | scope = TFConverter.get_scope_name(node.name) | ||||
@@ -461,8 +552,17 @@ class TFConverter: | |||||
if scope + '/kernel' not in self.name_node_dict: | if scope + '/kernel' not in self.name_node_dict: | ||||
continue | continue | ||||
self.conv2d_scope_names.add(scope) | self.conv2d_scope_names.add(scope) | ||||
elif node.op == 'MatMul': | |||||
scope = TFConverter.get_scope_name(node.name) | |||||
# for the case tf.nn.dense is called directly | |||||
if scope == '': | |||||
continue | |||||
# for the case tf.nn.dense is called within a scope | |||||
if scope + '/kernel' not in self.name_node_dict and scope.split('/Tensordot')[0] + '/kernel' not in self.name_node_dict: | |||||
continue | |||||
self.dense_scope_names.add(scope.split('/Tensordot')[0]) | |||||
# get the input name to the conv2d sub block | |||||
# get the input name to the conv2d/dense sub block | |||||
for node in self.nodes: | for node in self.nodes: | ||||
scope = TFConverter.get_scope_name(node.name) | scope = TFConverter.get_scope_name(node.name) | ||||
if scope in self.conv2d_scope_names: | if scope in self.conv2d_scope_names: | ||||
@@ -470,6 +570,16 @@ class TFConverter: | |||||
for inp in node.input: | for inp in node.input: | ||||
if TFConverter.get_scope_name(inp) != scope: | if TFConverter.get_scope_name(inp) != scope: | ||||
self.conv2d_scopename_inputname_dict[scope] = inp | self.conv2d_scopename_inputname_dict[scope] = inp | ||||
elif scope in self.dense_scope_names: | |||||
if node.op == 'MatMul' or node.op == 'Shape': | |||||
for inp in node.input: | |||||
if TFConverter.get_scope_name(inp) != scope: | |||||
self.dense_scopename_inputname_dict[scope] = inp | |||||
elif scope.split('/Tensordot')[0] in self.dense_scope_names: | |||||
if node.op == 'Transpose': | |||||
for inp in node.input: | |||||
if TFConverter.get_scope_name(inp).find(scope)<0 and TFConverter.get_scope_name(inp).find(scope.split('/')[0])<0: | |||||
self.dense_scopename_inputname_dict[scope.split('/Tensordot')[0]] = inp | |||||
def run(self): | def run(self): | ||||
@@ -477,7 +587,7 @@ class TFConverter: | |||||
self.generate_output_names() | self.generate_output_names() | ||||
self.remove_identity() | self.remove_identity() | ||||
self.generate_edges() | self.generate_edges() | ||||
self.generate_conv2d_scope_info() | |||||
self.generate_sub_block_op_scope_info() | |||||
if self.dump4tb: | if self.dump4tb: | ||||
self.dump_for_tensorboard() | self.dump_for_tensorboard() | ||||