|
|
|
@@ -19,10 +19,27 @@ |
|
|
|
*/ |
|
|
|
|
|
|
|
#include "libavutil/avassert.h" |
|
|
|
#include "libavutil/thread.h" |
|
|
|
#include "libavutil/cpu.h" |
|
|
|
#include "dnn_backend_native_layer_conv2d.h" |
|
|
|
|
|
|
|
#define CLAMP_TO_EDGE(x, w) ((x) < 0 ? 0 : ((x) >= (w) ? (w - 1) : (x))) |
|
|
|
|
|
|
|
//struct to pass parameters |
|
|
|
typedef struct thread_common_param{ |
|
|
|
DnnOperand *operands; |
|
|
|
const int32_t *input_operand_indexes; |
|
|
|
int32_t output_operand_index; |
|
|
|
const void *parameters; |
|
|
|
NativeContext *ctx; |
|
|
|
int thread_num; |
|
|
|
} thread_common_param; |
|
|
|
|
|
|
|
typedef struct thread_param{ |
|
|
|
thread_common_param *thread_common_param; |
|
|
|
int thread_index; |
|
|
|
} thread_param; |
|
|
|
|
|
|
|
int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num) |
|
|
|
{ |
|
|
|
ConvolutionalParams *conv_params; |
|
|
|
@@ -88,17 +105,20 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int fil |
|
|
|
return dnn_size; |
|
|
|
} |
|
|
|
|
|
|
|
int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes, |
|
|
|
int32_t output_operand_index, const void *parameters, NativeContext *ctx) |
|
|
|
static void * dnn_execute_layer_conv2d_thread(void *threadarg) |
|
|
|
{ |
|
|
|
//pass parameters |
|
|
|
thread_param *thread_param = (struct thread_param *)threadarg; |
|
|
|
thread_common_param *thread_common_param = thread_param->thread_common_param; |
|
|
|
DnnOperand *operands = thread_common_param->operands; |
|
|
|
float *output; |
|
|
|
int32_t input_operand_index = input_operand_indexes[0]; |
|
|
|
int32_t input_operand_index = thread_common_param->input_operand_indexes[0]; |
|
|
|
int number = operands[input_operand_index].dims[0]; |
|
|
|
int height = operands[input_operand_index].dims[1]; |
|
|
|
int width = operands[input_operand_index].dims[2]; |
|
|
|
int channel = operands[input_operand_index].dims[3]; |
|
|
|
const float *input = operands[input_operand_index].data; |
|
|
|
const ConvolutionalParams *conv_params = (const ConvolutionalParams *)parameters; |
|
|
|
const ConvolutionalParams *conv_params = (const ConvolutionalParams *)(thread_common_param->parameters); |
|
|
|
|
|
|
|
int radius = conv_params->kernel_size >> 1; |
|
|
|
int src_linesize = width * conv_params->input_num; |
|
|
|
@@ -106,7 +126,11 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ |
|
|
|
int filter_size = conv_params->kernel_size * filter_linesize; |
|
|
|
int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0; |
|
|
|
|
|
|
|
DnnOperand *output_operand = &operands[output_operand_index]; |
|
|
|
int thread_stride = (height - pad_size * 2) / thread_common_param->thread_num; |
|
|
|
int thread_start = thread_stride * thread_param->thread_index + pad_size; |
|
|
|
int thread_end = (thread_param->thread_index == thread_common_param->thread_num - 1) ? (height - pad_size) : (thread_start + thread_stride); |
|
|
|
|
|
|
|
DnnOperand *output_operand = &operands[thread_common_param->output_operand_index]; |
|
|
|
output_operand->dims[0] = number; |
|
|
|
output_operand->dims[1] = height - pad_size * 2; |
|
|
|
output_operand->dims[2] = width - pad_size * 2; |
|
|
|
@@ -114,19 +138,21 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ |
|
|
|
output_operand->data_type = operands[input_operand_index].data_type; |
|
|
|
output_operand->length = calculate_operand_data_length(output_operand); |
|
|
|
if (output_operand->length <= 0) { |
|
|
|
av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n"); |
|
|
|
return DNN_ERROR; |
|
|
|
av_log(thread_common_param->ctx, AV_LOG_ERROR, "The output data length overflow\n"); |
|
|
|
return (void *)DNN_ERROR; |
|
|
|
} |
|
|
|
output_operand->data = av_realloc(output_operand->data, output_operand->length); |
|
|
|
if (!output_operand->data) { |
|
|
|
av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n"); |
|
|
|
return DNN_ERROR; |
|
|
|
av_log(thread_common_param->ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n"); |
|
|
|
return (void *)DNN_ERROR; |
|
|
|
} |
|
|
|
|
|
|
|
output = output_operand->data; |
|
|
|
output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_start - pad_size); |
|
|
|
|
|
|
|
av_assert0(channel == conv_params->input_num); |
|
|
|
|
|
|
|
for (int y = pad_size; y < height - pad_size; ++y) { |
|
|
|
for (int y = thread_start; y < thread_end; ++y) { |
|
|
|
for (int x = pad_size; x < width - pad_size; ++x) { |
|
|
|
for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) { |
|
|
|
if (conv_params->has_bias) |
|
|
|
@@ -174,5 +200,64 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ |
|
|
|
output += conv_params->output_num; |
|
|
|
} |
|
|
|
} |
|
|
|
return 0; |
|
|
|
return (void *)DNN_SUCCESS; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes, |
|
|
|
int32_t output_operand_index, const void *parameters, NativeContext *ctx) |
|
|
|
{ |
|
|
|
int thread_num = (ctx->options.conv2d_threads <= 0 || ctx->options.conv2d_threads > av_cpu_count()) |
|
|
|
? (av_cpu_count() + 1) : (ctx->options.conv2d_threads); |
|
|
|
#if HAVE_PTHREAD_CANCEL |
|
|
|
pthread_t *thread_id = av_malloc(thread_num * sizeof(pthread_t)); |
|
|
|
#endif |
|
|
|
thread_param **thread_param = av_malloc(thread_num * sizeof(*thread_param)); |
|
|
|
void *res; |
|
|
|
int error_flag = DNN_SUCCESS; |
|
|
|
|
|
|
|
//struct used to pass parameters |
|
|
|
thread_common_param thread_common_param; |
|
|
|
thread_common_param.operands = operands; |
|
|
|
thread_common_param.input_operand_indexes = input_operand_indexes; |
|
|
|
thread_common_param.output_operand_index = output_operand_index; |
|
|
|
thread_common_param.parameters = parameters; |
|
|
|
thread_common_param.ctx = ctx; |
|
|
|
#if HAVE_PTHREAD_CANCEL |
|
|
|
thread_common_param.thread_num = thread_num; |
|
|
|
|
|
|
|
//create threads |
|
|
|
for (int i = 0; i < thread_num; i++){ |
|
|
|
thread_param[i] = av_malloc(sizeof(thread_param)); |
|
|
|
thread_param[i]->thread_common_param = &thread_common_param; |
|
|
|
thread_param[i]->thread_index = i; |
|
|
|
pthread_create(&thread_id[i], NULL, dnn_execute_layer_conv2d_thread, (void *)thread_param[i]); |
|
|
|
} |
|
|
|
|
|
|
|
//join threads, res gets function return |
|
|
|
for (int i = 0; i < thread_num; i++){ |
|
|
|
pthread_join(thread_id[i], &res); |
|
|
|
if ((int)res != DNN_SUCCESS) |
|
|
|
error_flag = (int)res; |
|
|
|
} |
|
|
|
|
|
|
|
//release memory |
|
|
|
av_free(thread_id); |
|
|
|
|
|
|
|
for (int i = 0; i < thread_num; i++){ |
|
|
|
av_free(thread_param[i]); |
|
|
|
} |
|
|
|
#else |
|
|
|
thread_common_param.thread_num = 1; |
|
|
|
thread_param[0] = av_malloc(sizeof(thread_param)); |
|
|
|
thread_param[0]->thread_common_param = &thread_common_param; |
|
|
|
thread_param[0]->thread_index = 0; |
|
|
|
res = dnn_execute_layer_conv2d_thread((void *)thread_param[0]); |
|
|
|
if ((int)res != DNN_SUCCESS) |
|
|
|
error_flag = (int)res; |
|
|
|
av_free(thread_param[0]); |
|
|
|
#endif |
|
|
|
|
|
|
|
av_free(thread_param); |
|
|
|
return error_flag; |
|
|
|
} |