Signed-off-by: Pedro Arthur <bygrandao@gmail.com>tags/n4.1
@@ -15593,30 +15593,24 @@ option may cause flicker since the B-Frames have often larger QP. Default is | |||||
@section sr | @section sr | ||||
Scale the input by applying one of the super-resolution methods based on | Scale the input by applying one of the super-resolution methods based on | ||||
convolutional neural networks. | |||||
convolutional neural networks. Supported models: | |||||
Training scripts as well as scripts for model generation are provided in | |||||
the repository at @url{https://github.com/HighVoltageRocknRoll/sr.git}. | |||||
The filter accepts the following options: | |||||
@table @option | |||||
@item model | |||||
Specify which super-resolution model to use. This option accepts the following values: | |||||
@table @samp | |||||
@item srcnn | |||||
Super-Resolution Convolutional Neural Network model. | |||||
@itemize | |||||
@item | |||||
Super-Resolution Convolutional Neural Network model (SRCNN). | |||||
See @url{https://arxiv.org/abs/1501.00092}. | See @url{https://arxiv.org/abs/1501.00092}. | ||||
@item espcn | |||||
Efficient Sub-Pixel Convolutional Neural Network model. | |||||
@item | |||||
Efficient Sub-Pixel Convolutional Neural Network model (ESPCN). | |||||
See @url{https://arxiv.org/abs/1609.05158}. | See @url{https://arxiv.org/abs/1609.05158}. | ||||
@end itemize | |||||
@end table | |||||
Training scripts as well as scripts for model generation are provided in | |||||
the repository at @url{https://github.com/HighVoltageRocknRoll/sr.git}. | |||||
Default value is @samp{srcnn}. | |||||
The filter accepts the following options: | |||||
@table @option | |||||
@item dnn_backend | @item dnn_backend | ||||
Specify which DNN backend to use for model loading and execution. This option accepts | Specify which DNN backend to use for model loading and execution. This option accepts | ||||
the following values: | the following values: | ||||
@@ -15630,23 +15624,20 @@ TensorFlow backend. To enable this backend you | |||||
need to install the TensorFlow for C library (see | need to install the TensorFlow for C library (see | ||||
@url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with | @url{https://www.tensorflow.org/install/install_c}) and configure FFmpeg with | ||||
@code{--enable-libtensorflow} | @code{--enable-libtensorflow} | ||||
@end table | @end table | ||||
Default value is @samp{native}. | Default value is @samp{native}. | ||||
@item scale_factor | |||||
Set scale factor for SRCNN model, for which custom model file was provided. | |||||
Allowed values are @code{2}, @code{3} and @code{4}. Default value is @code{2}. | |||||
Scale factor is necessary for SRCNN model, because it accepts input upscaled | |||||
using bicubic upscaling with proper scale factor. | |||||
@item model_filename | |||||
@item model | |||||
Set path to model file specifying network architecture and its parameters. | Set path to model file specifying network architecture and its parameters. | ||||
Note that different backends use different file formats. TensorFlow backend | Note that different backends use different file formats. TensorFlow backend | ||||
can load files for both formats, while native backend can load files for only | can load files for both formats, while native backend can load files for only | ||||
its format. | its format. | ||||
@item scale_factor | |||||
Set scale factor for SRCNN model. Allowed values are @code{2}, @code{3} and @code{4}. | |||||
Default value is @code{2}. Scale factor is necessary for SRCNN model, because it accepts | |||||
input upscaled using bicubic upscaling with proper scale factor. | |||||
@end table | @end table | ||||
@anchor{subtitles} | @anchor{subtitles} | ||||
@@ -24,40 +24,6 @@ | |||||
*/ | */ | ||||
#include "dnn_backend_native.h" | #include "dnn_backend_native.h" | ||||
#include "dnn_srcnn.h" | |||||
#include "dnn_espcn.h" | |||||
#include "libavformat/avio.h" | |||||
typedef enum {INPUT, CONV, DEPTH_TO_SPACE} LayerType; | |||||
typedef enum {RELU, TANH, SIGMOID} ActivationFunc; | |||||
typedef struct Layer{ | |||||
LayerType type; | |||||
float *output; | |||||
void *params; | |||||
} Layer; | |||||
typedef struct ConvolutionalParams{ | |||||
int32_t input_num, output_num, kernel_size; | |||||
ActivationFunc activation; | |||||
float *kernel; | |||||
float *biases; | |||||
} ConvolutionalParams; | |||||
typedef struct InputParams{ | |||||
int height, width, channels; | |||||
} InputParams; | |||||
typedef struct DepthToSpaceParams{ | |||||
int block_size; | |||||
} DepthToSpaceParams; | |||||
// Represents simple feed-forward convolutional network. | |||||
typedef struct ConvolutionalNetwork{ | |||||
Layer *layers; | |||||
int32_t layers_num; | |||||
} ConvolutionalNetwork; | |||||
static DNNReturnType set_input_output_native(void *model, DNNData *input, DNNData *output) | static DNNReturnType set_input_output_native(void *model, DNNData *input, DNNData *output) | ||||
{ | { | ||||
@@ -134,7 +100,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename) | |||||
AVIOContext *model_file_context; | AVIOContext *model_file_context; | ||||
int file_size, dnn_size, kernel_size, i; | int file_size, dnn_size, kernel_size, i; | ||||
int32_t layer; | int32_t layer; | ||||
LayerType layer_type; | |||||
DNNLayerType layer_type; | |||||
ConvolutionalParams *conv_params; | ConvolutionalParams *conv_params; | ||||
DepthToSpaceParams *depth_to_space_params; | DepthToSpaceParams *depth_to_space_params; | ||||
@@ -251,118 +217,6 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename) | |||||
return model; | return model; | ||||
} | } | ||||
static int set_up_conv_layer(Layer *layer, const float *kernel, const float *biases, ActivationFunc activation, | |||||
int32_t input_num, int32_t output_num, int32_t size) | |||||
{ | |||||
ConvolutionalParams *conv_params; | |||||
int kernel_size; | |||||
conv_params = av_malloc(sizeof(ConvolutionalParams)); | |||||
if (!conv_params){ | |||||
return DNN_ERROR; | |||||
} | |||||
conv_params->activation = activation; | |||||
conv_params->input_num = input_num; | |||||
conv_params->output_num = output_num; | |||||
conv_params->kernel_size = size; | |||||
kernel_size = input_num * output_num * size * size; | |||||
conv_params->kernel = av_malloc(kernel_size * sizeof(float)); | |||||
conv_params->biases = av_malloc(conv_params->output_num * sizeof(float)); | |||||
if (!conv_params->kernel || !conv_params->biases){ | |||||
av_freep(&conv_params->kernel); | |||||
av_freep(&conv_params->biases); | |||||
av_freep(&conv_params); | |||||
return DNN_ERROR; | |||||
} | |||||
memcpy(conv_params->kernel, kernel, kernel_size * sizeof(float)); | |||||
memcpy(conv_params->biases, biases, output_num * sizeof(float)); | |||||
layer->type = CONV; | |||||
layer->params = conv_params; | |||||
return DNN_SUCCESS; | |||||
} | |||||
DNNModel *ff_dnn_load_default_model_native(DNNDefaultModel model_type) | |||||
{ | |||||
DNNModel *model = NULL; | |||||
ConvolutionalNetwork *network = NULL; | |||||
DepthToSpaceParams *depth_to_space_params; | |||||
int32_t layer; | |||||
model = av_malloc(sizeof(DNNModel)); | |||||
if (!model){ | |||||
return NULL; | |||||
} | |||||
network = av_malloc(sizeof(ConvolutionalNetwork)); | |||||
if (!network){ | |||||
av_freep(&model); | |||||
return NULL; | |||||
} | |||||
model->model = (void *)network; | |||||
switch (model_type){ | |||||
case DNN_SRCNN: | |||||
network->layers_num = 4; | |||||
break; | |||||
case DNN_ESPCN: | |||||
network->layers_num = 5; | |||||
break; | |||||
default: | |||||
av_freep(&network); | |||||
av_freep(&model); | |||||
return NULL; | |||||
} | |||||
network->layers = av_malloc(network->layers_num * sizeof(Layer)); | |||||
if (!network->layers){ | |||||
av_freep(&network); | |||||
av_freep(&model); | |||||
return NULL; | |||||
} | |||||
for (layer = 0; layer < network->layers_num; ++layer){ | |||||
network->layers[layer].output = NULL; | |||||
network->layers[layer].params = NULL; | |||||
} | |||||
network->layers[0].type = INPUT; | |||||
network->layers[0].params = av_malloc(sizeof(InputParams)); | |||||
if (!network->layers[0].params){ | |||||
ff_dnn_free_model_native(&model); | |||||
return NULL; | |||||
} | |||||
switch (model_type){ | |||||
case DNN_SRCNN: | |||||
if (set_up_conv_layer(network->layers + 1, srcnn_conv1_kernel, srcnn_conv1_bias, RELU, 1, 64, 9) != DNN_SUCCESS || | |||||
set_up_conv_layer(network->layers + 2, srcnn_conv2_kernel, srcnn_conv2_bias, RELU, 64, 32, 1) != DNN_SUCCESS || | |||||
set_up_conv_layer(network->layers + 3, srcnn_conv3_kernel, srcnn_conv3_bias, RELU, 32, 1, 5) != DNN_SUCCESS){ | |||||
ff_dnn_free_model_native(&model); | |||||
return NULL; | |||||
} | |||||
break; | |||||
case DNN_ESPCN: | |||||
if (set_up_conv_layer(network->layers + 1, espcn_conv1_kernel, espcn_conv1_bias, TANH, 1, 64, 5) != DNN_SUCCESS || | |||||
set_up_conv_layer(network->layers + 2, espcn_conv2_kernel, espcn_conv2_bias, TANH, 64, 32, 3) != DNN_SUCCESS || | |||||
set_up_conv_layer(network->layers + 3, espcn_conv3_kernel, espcn_conv3_bias, SIGMOID, 32, 4, 3) != DNN_SUCCESS){ | |||||
ff_dnn_free_model_native(&model); | |||||
return NULL; | |||||
} | |||||
network->layers[4].type = DEPTH_TO_SPACE; | |||||
depth_to_space_params = av_malloc(sizeof(DepthToSpaceParams)); | |||||
if (!depth_to_space_params){ | |||||
ff_dnn_free_model_native(&model); | |||||
return NULL; | |||||
} | |||||
depth_to_space_params->block_size = 2; | |||||
network->layers[4].params = depth_to_space_params; | |||||
} | |||||
model->set_input_output = &set_input_output_native; | |||||
return model; | |||||
} | |||||
#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x))) | #define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x))) | ||||
static void convolve(const float *input, float *output, const ConvolutionalParams *conv_params, int width, int height) | static void convolve(const float *input, float *output, const ConvolutionalParams *conv_params, int width, int height) | ||||
@@ -28,10 +28,40 @@ | |||||
#define AVFILTER_DNN_BACKEND_NATIVE_H | #define AVFILTER_DNN_BACKEND_NATIVE_H | ||||
#include "dnn_interface.h" | #include "dnn_interface.h" | ||||
#include "libavformat/avio.h" | |||||
DNNModel *ff_dnn_load_model_native(const char *model_filename); | |||||
typedef enum {INPUT, CONV, DEPTH_TO_SPACE} DNNLayerType; | |||||
typedef enum {RELU, TANH, SIGMOID} DNNActivationFunc; | |||||
typedef struct Layer{ | |||||
DNNLayerType type; | |||||
float *output; | |||||
void *params; | |||||
} Layer; | |||||
typedef struct ConvolutionalParams{ | |||||
int32_t input_num, output_num, kernel_size; | |||||
DNNActivationFunc activation; | |||||
float *kernel; | |||||
float *biases; | |||||
} ConvolutionalParams; | |||||
DNNModel *ff_dnn_load_default_model_native(DNNDefaultModel model_type); | |||||
typedef struct InputParams{ | |||||
int height, width, channels; | |||||
} InputParams; | |||||
typedef struct DepthToSpaceParams{ | |||||
int block_size; | |||||
} DepthToSpaceParams; | |||||
// Represents simple feed-forward convolutional network. | |||||
typedef struct ConvolutionalNetwork{ | |||||
Layer *layers; | |||||
int32_t layers_num; | |||||
} ConvolutionalNetwork; | |||||
DNNModel *ff_dnn_load_model_native(const char *model_filename); | |||||
DNNReturnType ff_dnn_execute_model_native(const DNNModel *model); | DNNReturnType ff_dnn_execute_model_native(const DNNModel *model); | ||||
@@ -24,8 +24,7 @@ | |||||
*/ | */ | ||||
#include "dnn_backend_tf.h" | #include "dnn_backend_tf.h" | ||||
#include "dnn_srcnn.h" | |||||
#include "dnn_espcn.h" | |||||
#include "dnn_backend_native.h" | |||||
#include "libavformat/avio.h" | #include "libavformat/avio.h" | ||||
#include <tensorflow/c/c_api.h> | #include <tensorflow/c/c_api.h> | ||||
@@ -156,32 +155,14 @@ static DNNReturnType set_input_output_tf(void *model, DNNData *input, DNNData *o | |||||
return DNN_SUCCESS; | return DNN_SUCCESS; | ||||
} | } | ||||
DNNModel *ff_dnn_load_model_tf(const char *model_filename) | |||||
static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename) | |||||
{ | { | ||||
DNNModel *model = NULL; | |||||
TFModel *tf_model = NULL; | |||||
TF_Buffer *graph_def; | TF_Buffer *graph_def; | ||||
TF_ImportGraphDefOptions *graph_opts; | TF_ImportGraphDefOptions *graph_opts; | ||||
model = av_malloc(sizeof(DNNModel)); | |||||
if (!model){ | |||||
return NULL; | |||||
} | |||||
tf_model = av_malloc(sizeof(TFModel)); | |||||
if (!tf_model){ | |||||
av_freep(&model); | |||||
return NULL; | |||||
} | |||||
tf_model->session = NULL; | |||||
tf_model->input_tensor = NULL; | |||||
tf_model->output_data = NULL; | |||||
graph_def = read_graph(model_filename); | graph_def = read_graph(model_filename); | ||||
if (!graph_def){ | if (!graph_def){ | ||||
av_freep(&tf_model); | |||||
av_freep(&model); | |||||
return NULL; | |||||
return DNN_ERROR; | |||||
} | } | ||||
tf_model->graph = TF_NewGraph(); | tf_model->graph = TF_NewGraph(); | ||||
tf_model->status = TF_NewStatus(); | tf_model->status = TF_NewStatus(); | ||||
@@ -192,26 +173,178 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename) | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | if (TF_GetCode(tf_model->status) != TF_OK){ | ||||
TF_DeleteGraph(tf_model->graph); | TF_DeleteGraph(tf_model->graph); | ||||
TF_DeleteStatus(tf_model->status); | TF_DeleteStatus(tf_model->status); | ||||
av_freep(&tf_model); | |||||
av_freep(&model); | |||||
return NULL; | |||||
return DNN_ERROR; | |||||
} | } | ||||
model->model = (void *)tf_model; | |||||
model->set_input_output = &set_input_output_tf; | |||||
return DNN_SUCCESS; | |||||
} | |||||
return model; | |||||
#define NAME_BUFFER_SIZE 256 | |||||
static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op, | |||||
ConvolutionalParams* params, const int layer) | |||||
{ | |||||
TF_Operation *op; | |||||
TF_OperationDescription *op_desc; | |||||
TF_Output input; | |||||
int64_t strides[] = {1, 1, 1, 1}; | |||||
TF_Tensor *tensor; | |||||
int64_t dims[4]; | |||||
int dims_len; | |||||
char name_buffer[NAME_BUFFER_SIZE]; | |||||
int32_t size; | |||||
size = params->input_num * params->output_num * params->kernel_size * params->kernel_size; | |||||
input.index = 0; | |||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_kernel%d", layer); | |||||
op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer); | |||||
TF_SetAttrType(op_desc, "dtype", TF_FLOAT); | |||||
dims[0] = params->output_num; | |||||
dims[1] = params->kernel_size; | |||||
dims[2] = params->kernel_size; | |||||
dims[3] = params->input_num; | |||||
dims_len = 4; | |||||
tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, size * sizeof(float)); | |||||
memcpy(TF_TensorData(tensor), params->kernel, size * sizeof(float)); | |||||
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return DNN_ERROR; | |||||
} | |||||
op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return DNN_ERROR; | |||||
} | |||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "transpose%d", layer); | |||||
op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer); | |||||
input.oper = op; | |||||
TF_AddInput(op_desc, input); | |||||
input.oper = transpose_op; | |||||
TF_AddInput(op_desc, input); | |||||
TF_SetAttrType(op_desc, "T", TF_FLOAT); | |||||
TF_SetAttrType(op_desc, "Tperm", TF_INT32); | |||||
op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return DNN_ERROR; | |||||
} | |||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "conv2d%d", layer); | |||||
op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer); | |||||
input.oper = *cur_op; | |||||
TF_AddInput(op_desc, input); | |||||
input.oper = op; | |||||
TF_AddInput(op_desc, input); | |||||
TF_SetAttrType(op_desc, "T", TF_FLOAT); | |||||
TF_SetAttrIntList(op_desc, "strides", strides, 4); | |||||
TF_SetAttrString(op_desc, "padding", "VALID", 5); | |||||
*cur_op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return DNN_ERROR; | |||||
} | |||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "conv_biases%d", layer); | |||||
op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer); | |||||
TF_SetAttrType(op_desc, "dtype", TF_FLOAT); | |||||
dims[0] = params->output_num; | |||||
dims_len = 1; | |||||
tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, params->output_num * sizeof(float)); | |||||
memcpy(TF_TensorData(tensor), params->biases, params->output_num * sizeof(float)); | |||||
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return DNN_ERROR; | |||||
} | |||||
op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return DNN_ERROR; | |||||
} | |||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "bias_add%d", layer); | |||||
op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer); | |||||
input.oper = *cur_op; | |||||
TF_AddInput(op_desc, input); | |||||
input.oper = op; | |||||
TF_AddInput(op_desc, input); | |||||
TF_SetAttrType(op_desc, "T", TF_FLOAT); | |||||
*cur_op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return DNN_ERROR; | |||||
} | |||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "activation%d", layer); | |||||
switch (params->activation){ | |||||
case RELU: | |||||
op_desc = TF_NewOperation(tf_model->graph, "Relu", name_buffer); | |||||
break; | |||||
case TANH: | |||||
op_desc = TF_NewOperation(tf_model->graph, "Tanh", name_buffer); | |||||
break; | |||||
case SIGMOID: | |||||
op_desc = TF_NewOperation(tf_model->graph, "Sigmoid", name_buffer); | |||||
break; | |||||
default: | |||||
return DNN_ERROR; | |||||
} | |||||
input.oper = *cur_op; | |||||
TF_AddInput(op_desc, input); | |||||
TF_SetAttrType(op_desc, "T", TF_FLOAT); | |||||
*cur_op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return DNN_ERROR; | |||||
} | |||||
return DNN_SUCCESS; | |||||
} | } | ||||
static TF_Operation *add_pad_op(TFModel *tf_model, TF_Operation *input_op, int32_t pad) | |||||
static DNNReturnType add_depth_to_space_layer(TFModel *tf_model, TF_Operation **cur_op, | |||||
DepthToSpaceParams *params, const int layer) | |||||
{ | { | ||||
TF_OperationDescription *op_desc; | TF_OperationDescription *op_desc; | ||||
TF_Output input; | |||||
char name_buffer[NAME_BUFFER_SIZE]; | |||||
snprintf(name_buffer, NAME_BUFFER_SIZE, "depth_to_space%d", layer); | |||||
op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", name_buffer); | |||||
input.oper = *cur_op; | |||||
input.index = 0; | |||||
TF_AddInput(op_desc, input); | |||||
TF_SetAttrType(op_desc, "T", TF_FLOAT); | |||||
TF_SetAttrInt(op_desc, "block_size", params->block_size); | |||||
*cur_op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return DNN_ERROR; | |||||
} | |||||
return DNN_SUCCESS; | |||||
} | |||||
static int calculate_pad(const ConvolutionalNetwork *conv_network) | |||||
{ | |||||
ConvolutionalParams *params; | |||||
int32_t layer; | |||||
int pad = 0; | |||||
for (layer = 0; layer < conv_network->layers_num; ++layer){ | |||||
if (conv_network->layers[layer].type == CONV){ | |||||
params = (ConvolutionalParams *)conv_network->layers[layer].params; | |||||
pad += params->kernel_size >> 1; | |||||
} | |||||
} | |||||
return pad; | |||||
} | |||||
static DNNReturnType add_pad_op(TFModel *tf_model, TF_Operation **cur_op, const int32_t pad) | |||||
{ | |||||
TF_Operation *op; | TF_Operation *op; | ||||
TF_Tensor *tensor; | TF_Tensor *tensor; | ||||
TF_OperationDescription *op_desc; | |||||
TF_Output input; | TF_Output input; | ||||
int32_t *pads; | int32_t *pads; | ||||
int64_t pads_shape[] = {4, 2}; | int64_t pads_shape[] = {4, 2}; | ||||
input.index = 0; | |||||
op_desc = TF_NewOperation(tf_model->graph, "Const", "pads"); | op_desc = TF_NewOperation(tf_model->graph, "Const", "pads"); | ||||
TF_SetAttrType(op_desc, "dtype", TF_INT32); | TF_SetAttrType(op_desc, "dtype", TF_INT32); | ||||
tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t)); | tensor = TF_AllocateTensor(TF_INT32, pads_shape, 2, 4 * 2 * sizeof(int32_t)); | ||||
@@ -222,68 +355,73 @@ static TF_Operation *add_pad_op(TFModel *tf_model, TF_Operation *input_op, int32 | |||||
pads[6] = 0; pads[7] = 0; | pads[6] = 0; pads[7] = 0; | ||||
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status); | TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status); | ||||
if (TF_GetCode(tf_model->status) != TF_OK){ | if (TF_GetCode(tf_model->status) != TF_OK){ | ||||
return NULL; | |||||
return DNN_ERROR; | |||||
} | } | ||||
op = TF_FinishOperation(op_desc, tf_model->status); | op = TF_FinishOperation(op_desc, tf_model->status); | ||||
if (TF_GetCode(tf_model->status) != TF_OK){ | if (TF_GetCode(tf_model->status) != TF_OK){ | ||||
return NULL; | |||||
return DNN_ERROR; | |||||
} | } | ||||
op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad"); | op_desc = TF_NewOperation(tf_model->graph, "MirrorPad", "mirror_pad"); | ||||
input.oper = input_op; | |||||
input.index = 0; | |||||
input.oper = *cur_op; | |||||
TF_AddInput(op_desc, input); | TF_AddInput(op_desc, input); | ||||
input.oper = op; | input.oper = op; | ||||
TF_AddInput(op_desc, input); | TF_AddInput(op_desc, input); | ||||
TF_SetAttrType(op_desc, "T", TF_FLOAT); | TF_SetAttrType(op_desc, "T", TF_FLOAT); | ||||
TF_SetAttrType(op_desc, "Tpaddings", TF_INT32); | TF_SetAttrType(op_desc, "Tpaddings", TF_INT32); | ||||
TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9); | TF_SetAttrString(op_desc, "mode", "SYMMETRIC", 9); | ||||
op = TF_FinishOperation(op_desc, tf_model->status); | |||||
*cur_op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | if (TF_GetCode(tf_model->status) != TF_OK){ | ||||
return NULL; | |||||
return DNN_ERROR; | |||||
} | } | ||||
return op; | |||||
return DNN_SUCCESS; | |||||
} | } | ||||
static TF_Operation *add_const_op(TFModel *tf_model, const float *values, const int64_t *dims, int dims_len, const char *name) | |||||
static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename) | |||||
{ | { | ||||
int dim; | |||||
int32_t layer; | |||||
TF_OperationDescription *op_desc; | TF_OperationDescription *op_desc; | ||||
TF_Operation *op; | |||||
TF_Operation *transpose_op; | |||||
TF_Tensor *tensor; | TF_Tensor *tensor; | ||||
size_t len; | |||||
TF_Output input; | |||||
int32_t *transpose_perm; | |||||
int64_t transpose_perm_shape[] = {4}; | |||||
int64_t input_shape[] = {1, -1, -1, -1}; | |||||
int32_t pad; | |||||
DNNReturnType layer_add_res; | |||||
DNNModel *native_model = NULL; | |||||
ConvolutionalNetwork *conv_network; | |||||
native_model = ff_dnn_load_model_native(model_filename); | |||||
if (!native_model){ | |||||
return DNN_ERROR; | |||||
} | |||||
op_desc = TF_NewOperation(tf_model->graph, "Const", name); | |||||
TF_SetAttrType(op_desc, "dtype", TF_FLOAT); | |||||
len = sizeof(float); | |||||
for (dim = 0; dim < dims_len; ++dim){ | |||||
len *= dims[dim]; | |||||
conv_network = (ConvolutionalNetwork *)native_model->model; | |||||
pad = calculate_pad(conv_network); | |||||
tf_model->graph = TF_NewGraph(); | |||||
tf_model->status = TF_NewStatus(); | |||||
#define CLEANUP_ON_ERROR(tf_model) \ | |||||
{ \ | |||||
TF_DeleteGraph(tf_model->graph); \ | |||||
TF_DeleteStatus(tf_model->status); \ | |||||
return DNN_ERROR; \ | |||||
} | } | ||||
tensor = TF_AllocateTensor(TF_FLOAT, dims, dims_len, len); | |||||
memcpy(TF_TensorData(tensor), values, len); | |||||
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status); | |||||
op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x"); | |||||
TF_SetAttrType(op_desc, "dtype", TF_FLOAT); | |||||
TF_SetAttrShape(op_desc, "shape", input_shape, 4); | |||||
op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | if (TF_GetCode(tf_model->status) != TF_OK){ | ||||
return NULL; | |||||
CLEANUP_ON_ERROR(tf_model); | |||||
} | } | ||||
return TF_FinishOperation(op_desc, tf_model->status); | |||||
} | |||||
static TF_Operation* add_conv_layers(TFModel *tf_model, const float **consts, const int64_t **consts_dims, | |||||
const int *consts_dims_len, const char **activations, | |||||
TF_Operation *input_op, int layers_num) | |||||
{ | |||||
int i; | |||||
TF_OperationDescription *op_desc; | |||||
TF_Operation *op; | |||||
TF_Operation *transpose_op; | |||||
TF_Output input; | |||||
int64_t strides[] = {1, 1, 1, 1}; | |||||
int32_t *transpose_perm; | |||||
TF_Tensor *tensor; | |||||
int64_t transpose_perm_shape[] = {4}; | |||||
#define NAME_BUFF_SIZE 256 | |||||
char name_buffer[NAME_BUFF_SIZE]; | |||||
if (add_pad_op(tf_model, &op, pad) != DNN_SUCCESS){ | |||||
CLEANUP_ON_ERROR(tf_model); | |||||
} | |||||
op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm"); | op_desc = TF_NewOperation(tf_model->graph, "Const", "transpose_perm"); | ||||
TF_SetAttrType(op_desc, "dtype", TF_INT32); | TF_SetAttrType(op_desc, "dtype", TF_INT32); | ||||
@@ -295,153 +433,48 @@ static TF_Operation* add_conv_layers(TFModel *tf_model, const float **consts, co | |||||
transpose_perm[3] = 0; | transpose_perm[3] = 0; | ||||
TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status); | TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status); | ||||
if (TF_GetCode(tf_model->status) != TF_OK){ | if (TF_GetCode(tf_model->status) != TF_OK){ | ||||
return NULL; | |||||
CLEANUP_ON_ERROR(tf_model); | |||||
} | } | ||||
transpose_op = TF_FinishOperation(op_desc, tf_model->status); | transpose_op = TF_FinishOperation(op_desc, tf_model->status); | ||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return NULL; | |||||
} | |||||
input.index = 0; | |||||
for (i = 0; i < layers_num; ++i){ | |||||
snprintf(name_buffer, NAME_BUFF_SIZE, "conv_kernel%d", i); | |||||
op = add_const_op(tf_model, consts[i << 1], consts_dims[i << 1], consts_dims_len[i << 1], name_buffer); | |||||
if (TF_GetCode(tf_model->status) != TF_OK || op == NULL){ | |||||
return NULL; | |||||
} | |||||
snprintf(name_buffer, NAME_BUFF_SIZE, "transpose%d", i); | |||||
op_desc = TF_NewOperation(tf_model->graph, "Transpose", name_buffer); | |||||
input.oper = op; | |||||
TF_AddInput(op_desc, input); | |||||
input.oper = transpose_op; | |||||
TF_AddInput(op_desc, input); | |||||
TF_SetAttrType(op_desc, "T", TF_FLOAT); | |||||
TF_SetAttrType(op_desc, "Tperm", TF_INT32); | |||||
op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return NULL; | |||||
} | |||||
snprintf(name_buffer, NAME_BUFF_SIZE, "conv2d%d", i); | |||||
op_desc = TF_NewOperation(tf_model->graph, "Conv2D", name_buffer); | |||||
input.oper = input_op; | |||||
TF_AddInput(op_desc, input); | |||||
input.oper = op; | |||||
TF_AddInput(op_desc, input); | |||||
TF_SetAttrType(op_desc, "T", TF_FLOAT); | |||||
TF_SetAttrIntList(op_desc, "strides", strides, 4); | |||||
TF_SetAttrString(op_desc, "padding", "VALID", 5); | |||||
input_op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return NULL; | |||||
} | |||||
snprintf(name_buffer, NAME_BUFF_SIZE, "conv_biases%d", i); | |||||
op = add_const_op(tf_model, consts[(i << 1) + 1], consts_dims[(i << 1) + 1], consts_dims_len[(i << 1) + 1], name_buffer); | |||||
if (TF_GetCode(tf_model->status) != TF_OK || op == NULL){ | |||||
return NULL; | |||||
for (layer = 0; layer < conv_network->layers_num; ++layer){ | |||||
switch (conv_network->layers[layer].type){ | |||||
case INPUT: | |||||
break; | |||||
case CONV: | |||||
layer_add_res = add_conv_layer(tf_model, transpose_op, &op, | |||||
(ConvolutionalParams *)conv_network->layers[layer].params, layer); | |||||
break; | |||||
case DEPTH_TO_SPACE: | |||||
layer_add_res = add_depth_to_space_layer(tf_model, &op, | |||||
(DepthToSpaceParams *)conv_network->layers[layer].params, layer); | |||||
break; | |||||
default: | |||||
CLEANUP_ON_ERROR(tf_model); | |||||
} | } | ||||
snprintf(name_buffer, NAME_BUFF_SIZE, "bias_add%d", i); | |||||
op_desc = TF_NewOperation(tf_model->graph, "BiasAdd", name_buffer); | |||||
input.oper = input_op; | |||||
TF_AddInput(op_desc, input); | |||||
input.oper = op; | |||||
TF_AddInput(op_desc, input); | |||||
TF_SetAttrType(op_desc, "T", TF_FLOAT); | |||||
input_op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return NULL; | |||||
if (layer_add_res != DNN_SUCCESS){ | |||||
CLEANUP_ON_ERROR(tf_model); | |||||
} | } | ||||
} | |||||
snprintf(name_buffer, NAME_BUFF_SIZE, "activation%d", i); | |||||
op_desc = TF_NewOperation(tf_model->graph, activations[i], name_buffer); | |||||
input.oper = input_op; | |||||
TF_AddInput(op_desc, input); | |||||
TF_SetAttrType(op_desc, "T", TF_FLOAT); | |||||
input_op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
return NULL; | |||||
} | |||||
op_desc = TF_NewOperation(tf_model->graph, "Identity", "y"); | |||||
input.oper = op; | |||||
TF_AddInput(op_desc, input); | |||||
TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
CLEANUP_ON_ERROR(tf_model); | |||||
} | } | ||||
return input_op; | |||||
ff_dnn_free_model_native(&native_model); | |||||
return DNN_SUCCESS; | |||||
} | } | ||||
DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type) | |||||
DNNModel *ff_dnn_load_model_tf(const char *model_filename) | |||||
{ | { | ||||
DNNModel *model = NULL; | DNNModel *model = NULL; | ||||
TFModel *tf_model = NULL; | TFModel *tf_model = NULL; | ||||
TF_OperationDescription *op_desc; | |||||
TF_Operation *op; | |||||
TF_Output input; | |||||
static const int64_t input_shape[] = {1, -1, -1, 1}; | |||||
static const char tanh[] = "Tanh"; | |||||
static const char sigmoid[] = "Sigmoid"; | |||||
static const char relu[] = "Relu"; | |||||
static const float *srcnn_consts[] = { | |||||
srcnn_conv1_kernel, | |||||
srcnn_conv1_bias, | |||||
srcnn_conv2_kernel, | |||||
srcnn_conv2_bias, | |||||
srcnn_conv3_kernel, | |||||
srcnn_conv3_bias | |||||
}; | |||||
static const long int *srcnn_consts_dims[] = { | |||||
srcnn_conv1_kernel_dims, | |||||
srcnn_conv1_bias_dims, | |||||
srcnn_conv2_kernel_dims, | |||||
srcnn_conv2_bias_dims, | |||||
srcnn_conv3_kernel_dims, | |||||
srcnn_conv3_bias_dims | |||||
}; | |||||
static const int srcnn_consts_dims_len[] = { | |||||
4, | |||||
1, | |||||
4, | |||||
1, | |||||
4, | |||||
1 | |||||
}; | |||||
static const char *srcnn_activations[] = { | |||||
relu, | |||||
relu, | |||||
relu | |||||
}; | |||||
static const float *espcn_consts[] = { | |||||
espcn_conv1_kernel, | |||||
espcn_conv1_bias, | |||||
espcn_conv2_kernel, | |||||
espcn_conv2_bias, | |||||
espcn_conv3_kernel, | |||||
espcn_conv3_bias | |||||
}; | |||||
static const long int *espcn_consts_dims[] = { | |||||
espcn_conv1_kernel_dims, | |||||
espcn_conv1_bias_dims, | |||||
espcn_conv2_kernel_dims, | |||||
espcn_conv2_bias_dims, | |||||
espcn_conv3_kernel_dims, | |||||
espcn_conv3_bias_dims | |||||
}; | |||||
static const int espcn_consts_dims_len[] = { | |||||
4, | |||||
1, | |||||
4, | |||||
1, | |||||
4, | |||||
1 | |||||
}; | |||||
static const char *espcn_activations[] = { | |||||
tanh, | |||||
tanh, | |||||
sigmoid | |||||
}; | |||||
input.index = 0; | |||||
model = av_malloc(sizeof(DNNModel)); | model = av_malloc(sizeof(DNNModel)); | ||||
if (!model){ | if (!model){ | ||||
@@ -457,70 +490,13 @@ DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type) | |||||
tf_model->input_tensor = NULL; | tf_model->input_tensor = NULL; | ||||
tf_model->output_data = NULL; | tf_model->output_data = NULL; | ||||
tf_model->graph = TF_NewGraph(); | |||||
tf_model->status = TF_NewStatus(); | |||||
#define CLEANUP_ON_ERROR(tf_model, model) { \ | |||||
TF_DeleteGraph(tf_model->graph); \ | |||||
TF_DeleteStatus(tf_model->status); \ | |||||
av_freep(&tf_model); \ | |||||
av_freep(&model); \ | |||||
return NULL; \ | |||||
} | |||||
op_desc = TF_NewOperation(tf_model->graph, "Placeholder", "x"); | |||||
TF_SetAttrType(op_desc, "dtype", TF_FLOAT); | |||||
TF_SetAttrShape(op_desc, "shape", input_shape, 4); | |||||
op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
CLEANUP_ON_ERROR(tf_model, model); | |||||
} | |||||
switch (model_type){ | |||||
case DNN_SRCNN: | |||||
op = add_pad_op(tf_model, op, 6); | |||||
if (!op){ | |||||
CLEANUP_ON_ERROR(tf_model, model); | |||||
} | |||||
op = add_conv_layers(tf_model, srcnn_consts, | |||||
srcnn_consts_dims, srcnn_consts_dims_len, | |||||
srcnn_activations, op, 3); | |||||
if (!op){ | |||||
CLEANUP_ON_ERROR(tf_model, model); | |||||
} | |||||
break; | |||||
case DNN_ESPCN: | |||||
op = add_pad_op(tf_model, op, 4); | |||||
if (!op){ | |||||
CLEANUP_ON_ERROR(tf_model, model); | |||||
} | |||||
op = add_conv_layers(tf_model, espcn_consts, | |||||
espcn_consts_dims, espcn_consts_dims_len, | |||||
espcn_activations, op, 3); | |||||
if (!op){ | |||||
CLEANUP_ON_ERROR(tf_model, model); | |||||
} | |||||
if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){ | |||||
if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){ | |||||
av_freep(&tf_model); | |||||
av_freep(&model); | |||||
op_desc = TF_NewOperation(tf_model->graph, "DepthToSpace", "depth_to_space"); | |||||
input.oper = op; | |||||
TF_AddInput(op_desc, input); | |||||
TF_SetAttrType(op_desc, "T", TF_FLOAT); | |||||
TF_SetAttrInt(op_desc, "block_size", 2); | |||||
op = TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
CLEANUP_ON_ERROR(tf_model, model); | |||||
return NULL; | |||||
} | } | ||||
break; | |||||
default: | |||||
CLEANUP_ON_ERROR(tf_model, model); | |||||
} | |||||
op_desc = TF_NewOperation(tf_model->graph, "Identity", "y"); | |||||
input.oper = op; | |||||
TF_AddInput(op_desc, input); | |||||
TF_FinishOperation(op_desc, tf_model->status); | |||||
if (TF_GetCode(tf_model->status) != TF_OK){ | |||||
CLEANUP_ON_ERROR(tf_model, model); | |||||
} | } | ||||
model->model = (void *)tf_model; | model->model = (void *)tf_model; | ||||
@@ -529,6 +505,8 @@ DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type) | |||||
return model; | return model; | ||||
} | } | ||||
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model) | DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model) | ||||
{ | { | ||||
TFModel *tf_model = (TFModel *)model->model; | TFModel *tf_model = (TFModel *)model->model; | ||||
@@ -572,7 +550,7 @@ void ff_dnn_free_model_tf(DNNModel **model) | |||||
TF_DeleteTensor(tf_model->input_tensor); | TF_DeleteTensor(tf_model->input_tensor); | ||||
} | } | ||||
if (tf_model->output_data){ | if (tf_model->output_data){ | ||||
av_freep(&(tf_model->output_data->data)); | |||||
av_freep(&tf_model->output_data->data); | |||||
} | } | ||||
av_freep(&tf_model); | av_freep(&tf_model); | ||||
av_freep(model); | av_freep(model); | ||||
@@ -31,8 +31,6 @@ | |||||
DNNModel *ff_dnn_load_model_tf(const char *model_filename); | DNNModel *ff_dnn_load_model_tf(const char *model_filename); | ||||
DNNModel *ff_dnn_load_default_model_tf(DNNDefaultModel model_type); | |||||
DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model); | DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model); | ||||
void ff_dnn_free_model_tf(DNNModel **model); | void ff_dnn_free_model_tf(DNNModel **model); | ||||
@@ -40,14 +40,12 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type) | |||||
switch(backend_type){ | switch(backend_type){ | ||||
case DNN_NATIVE: | case DNN_NATIVE: | ||||
dnn_module->load_model = &ff_dnn_load_model_native; | dnn_module->load_model = &ff_dnn_load_model_native; | ||||
dnn_module->load_default_model = &ff_dnn_load_default_model_native; | |||||
dnn_module->execute_model = &ff_dnn_execute_model_native; | dnn_module->execute_model = &ff_dnn_execute_model_native; | ||||
dnn_module->free_model = &ff_dnn_free_model_native; | dnn_module->free_model = &ff_dnn_free_model_native; | ||||
break; | break; | ||||
case DNN_TF: | case DNN_TF: | ||||
#if (CONFIG_LIBTENSORFLOW == 1) | #if (CONFIG_LIBTENSORFLOW == 1) | ||||
dnn_module->load_model = &ff_dnn_load_model_tf; | dnn_module->load_model = &ff_dnn_load_model_tf; | ||||
dnn_module->load_default_model = &ff_dnn_load_default_model_tf; | |||||
dnn_module->execute_model = &ff_dnn_execute_model_tf; | dnn_module->execute_model = &ff_dnn_execute_model_tf; | ||||
dnn_module->free_model = &ff_dnn_free_model_tf; | dnn_module->free_model = &ff_dnn_free_model_tf; | ||||
#else | #else | ||||
@@ -30,8 +30,6 @@ typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType; | |||||
typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType; | typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType; | ||||
typedef enum {DNN_SRCNN, DNN_ESPCN} DNNDefaultModel; | |||||
typedef struct DNNData{ | typedef struct DNNData{ | ||||
float *data; | float *data; | ||||
int width, height, channels; | int width, height, channels; | ||||
@@ -49,8 +47,6 @@ typedef struct DNNModel{ | |||||
typedef struct DNNModule{ | typedef struct DNNModule{ | ||||
// Loads model and parameters from given file. Returns NULL if it is not possible. | // Loads model and parameters from given file. Returns NULL if it is not possible. | ||||
DNNModel *(*load_model)(const char *model_filename); | DNNModel *(*load_model)(const char *model_filename); | ||||
// Loads one of the default models | |||||
DNNModel *(*load_default_model)(DNNDefaultModel model_type); | |||||
// Executes model with specified input and output. Returns DNN_ERROR otherwise. | // Executes model with specified input and output. Returns DNN_ERROR otherwise. | ||||
DNNReturnType (*execute_model)(const DNNModel *model); | DNNReturnType (*execute_model)(const DNNModel *model); | ||||
// Frees memory allocated for model. | // Frees memory allocated for model. | ||||
@@ -33,12 +33,9 @@ | |||||
#include "libswscale/swscale.h" | #include "libswscale/swscale.h" | ||||
#include "dnn_interface.h" | #include "dnn_interface.h" | ||||
typedef enum {SRCNN, ESPCN} SRModel; | |||||
typedef struct SRContext { | typedef struct SRContext { | ||||
const AVClass *class; | const AVClass *class; | ||||
SRModel model_type; | |||||
char *model_filename; | char *model_filename; | ||||
DNNBackendType backend_type; | DNNBackendType backend_type; | ||||
DNNModule *dnn_module; | DNNModule *dnn_module; | ||||
@@ -52,16 +49,13 @@ typedef struct SRContext { | |||||
#define OFFSET(x) offsetof(SRContext, x) | #define OFFSET(x) offsetof(SRContext, x) | ||||
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM | #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM | ||||
static const AVOption sr_options[] = { | static const AVOption sr_options[] = { | ||||
{ "model", "specifies what DNN model to use", OFFSET(model_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "model_type" }, | |||||
{ "srcnn", "Super-Resolution Convolutional Neural Network model (scale factor should be specified for custom SRCNN model)", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "model_type" }, | |||||
{ "espcn", "Efficient Sub-Pixel Convolutional Neural Network model", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "model_type" }, | |||||
{ "dnn_backend", "DNN backend used for model execution", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" }, | { "dnn_backend", "DNN backend used for model execution", OFFSET(backend_type), AV_OPT_TYPE_FLAGS, { .i64 = 0 }, 0, 1, FLAGS, "backend" }, | ||||
{ "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" }, | { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" }, | ||||
#if (CONFIG_LIBTENSORFLOW == 1) | #if (CONFIG_LIBTENSORFLOW == 1) | ||||
{ "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" }, | { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" }, | ||||
#endif | #endif | ||||
{"scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS}, | |||||
{ "model_filename", "path to model file specifying network architecture and its parameters", OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS }, | |||||
{ "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS }, | |||||
{ "model", "path to model file specifying network architecture and its parameters", OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS }, | |||||
{ NULL } | { NULL } | ||||
}; | }; | ||||
@@ -77,15 +71,8 @@ static av_cold int init(AVFilterContext *context) | |||||
return AVERROR(ENOMEM); | return AVERROR(ENOMEM); | ||||
} | } | ||||
if (!sr_context->model_filename){ | if (!sr_context->model_filename){ | ||||
av_log(context, AV_LOG_VERBOSE, "model file for network was not specified, using default network for x2 upsampling\n"); | |||||
sr_context->scale_factor = 2; | |||||
switch (sr_context->model_type){ | |||||
case SRCNN: | |||||
sr_context->model = (sr_context->dnn_module->load_default_model)(DNN_SRCNN); | |||||
break; | |||||
case ESPCN: | |||||
sr_context->model = (sr_context->dnn_module->load_default_model)(DNN_ESPCN); | |||||
} | |||||
av_log(context, AV_LOG_ERROR, "model file for network was not specified\n"); | |||||
return AVERROR(EIO); | |||||
} | } | ||||
else{ | else{ | ||||
sr_context->model = (sr_context->dnn_module->load_model)(sr_context->model_filename); | sr_context->model = (sr_context->dnn_module->load_model)(sr_context->model_filename); | ||||
@@ -126,15 +113,8 @@ static int config_props(AVFilterLink *inlink) | |||||
DNNReturnType result; | DNNReturnType result; | ||||
int sws_src_h, sws_src_w, sws_dst_h, sws_dst_w; | int sws_src_h, sws_src_w, sws_dst_h, sws_dst_w; | ||||
switch (sr_context->model_type){ | |||||
case SRCNN: | |||||
sr_context->input.width = inlink->w * sr_context->scale_factor; | |||||
sr_context->input.height = inlink->h * sr_context->scale_factor; | |||||
break; | |||||
case ESPCN: | |||||
sr_context->input.width = inlink->w; | |||||
sr_context->input.height = inlink->h; | |||||
} | |||||
sr_context->input.width = inlink->w * sr_context->scale_factor; | |||||
sr_context->input.height = inlink->h * sr_context->scale_factor; | |||||
sr_context->input.channels = 1; | sr_context->input.channels = 1; | ||||
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output); | result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output); | ||||
@@ -143,6 +123,16 @@ static int config_props(AVFilterLink *inlink) | |||||
return AVERROR(EIO); | return AVERROR(EIO); | ||||
} | } | ||||
else{ | else{ | ||||
if (sr_context->input.height != sr_context->output.height || sr_context->input.width != sr_context->output.width){ | |||||
sr_context->input.width = inlink->w; | |||||
sr_context->input.height = inlink->h; | |||||
result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, &sr_context->output); | |||||
if (result != DNN_SUCCESS){ | |||||
av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n"); | |||||
return AVERROR(EIO); | |||||
} | |||||
sr_context->scale_factor = 0; | |||||
} | |||||
outlink->h = sr_context->output.height; | outlink->h = sr_context->output.height; | ||||
outlink->w = sr_context->output.width; | outlink->w = sr_context->output.width; | ||||
sr_context->sws_contexts[1] = sws_getContext(sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAY8, | sr_context->sws_contexts[1] = sws_getContext(sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAY8, | ||||
@@ -157,8 +147,7 @@ static int config_props(AVFilterLink *inlink) | |||||
av_log(context, AV_LOG_ERROR, "could not create SwsContext for conversions\n"); | av_log(context, AV_LOG_ERROR, "could not create SwsContext for conversions\n"); | ||||
return AVERROR(ENOMEM); | return AVERROR(ENOMEM); | ||||
} | } | ||||
switch (sr_context->model_type){ | |||||
case SRCNN: | |||||
if (sr_context->scale_factor){ | |||||
sr_context->sws_contexts[0] = sws_getContext(inlink->w, inlink->h, inlink->format, | sr_context->sws_contexts[0] = sws_getContext(inlink->w, inlink->h, inlink->format, | ||||
outlink->w, outlink->h, outlink->format, | outlink->w, outlink->h, outlink->format, | ||||
SWS_BICUBIC, NULL, NULL, NULL); | SWS_BICUBIC, NULL, NULL, NULL); | ||||
@@ -167,8 +156,8 @@ static int config_props(AVFilterLink *inlink) | |||||
return AVERROR(ENOMEM); | return AVERROR(ENOMEM); | ||||
} | } | ||||
sr_context->sws_slice_h = inlink->h; | sr_context->sws_slice_h = inlink->h; | ||||
break; | |||||
case ESPCN: | |||||
} | |||||
else{ | |||||
if (inlink->format != AV_PIX_FMT_GRAY8){ | if (inlink->format != AV_PIX_FMT_GRAY8){ | ||||
sws_src_h = sr_context->input.height; | sws_src_h = sr_context->input.height; | ||||
sws_src_w = sr_context->input.width; | sws_src_w = sr_context->input.width; | ||||
@@ -233,15 +222,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) | |||||
av_frame_copy_props(out, in); | av_frame_copy_props(out, in); | ||||
out->height = sr_context->output.height; | out->height = sr_context->output.height; | ||||
out->width = sr_context->output.width; | out->width = sr_context->output.width; | ||||
switch (sr_context->model_type){ | |||||
case SRCNN: | |||||
if (sr_context->scale_factor){ | |||||
sws_scale(sr_context->sws_contexts[0], (const uint8_t **)in->data, in->linesize, | sws_scale(sr_context->sws_contexts[0], (const uint8_t **)in->data, in->linesize, | ||||
0, sr_context->sws_slice_h, out->data, out->linesize); | 0, sr_context->sws_slice_h, out->data, out->linesize); | ||||
sws_scale(sr_context->sws_contexts[1], (const uint8_t **)out->data, out->linesize, | sws_scale(sr_context->sws_contexts[1], (const uint8_t **)out->data, out->linesize, | ||||
0, out->height, (uint8_t * const*)(&sr_context->input.data), &sr_context->sws_input_linesize); | 0, out->height, (uint8_t * const*)(&sr_context->input.data), &sr_context->sws_input_linesize); | ||||
break; | |||||
case ESPCN: | |||||
} | |||||
else{ | |||||
if (sr_context->sws_contexts[0]){ | if (sr_context->sws_contexts[0]){ | ||||
sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 1), in->linesize + 1, | sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 1), in->linesize + 1, | ||||
0, sr_context->sws_slice_h, out->data + 1, out->linesize + 1); | 0, sr_context->sws_slice_h, out->data + 1, out->linesize + 1); | ||||