|
|
|
@@ -32,6 +32,7 @@ typedef struct thread_common_param{ |
|
|
|
int32_t output_operand_index; |
|
|
|
const void *parameters; |
|
|
|
NativeContext *ctx; |
|
|
|
float *output_data; |
|
|
|
int thread_num; |
|
|
|
} thread_common_param; |
|
|
|
|
|
|
|
@@ -111,9 +112,7 @@ static void * dnn_execute_layer_conv2d_thread(void *threadarg) |
|
|
|
thread_param *thread_param = (struct thread_param *)threadarg; |
|
|
|
thread_common_param *thread_common_param = thread_param->thread_common_param; |
|
|
|
DnnOperand *operands = thread_common_param->operands; |
|
|
|
float *output; |
|
|
|
int32_t input_operand_index = thread_common_param->input_operand_indexes[0]; |
|
|
|
int number = operands[input_operand_index].dims[0]; |
|
|
|
int height = operands[input_operand_index].dims[1]; |
|
|
|
int width = operands[input_operand_index].dims[2]; |
|
|
|
int channel = operands[input_operand_index].dims[3]; |
|
|
|
@@ -130,24 +129,7 @@ static void * dnn_execute_layer_conv2d_thread(void *threadarg) |
|
|
|
int thread_start = thread_stride * thread_param->thread_index + pad_size; |
|
|
|
int thread_end = (thread_param->thread_index == thread_common_param->thread_num - 1) ? (height - pad_size) : (thread_start + thread_stride); |
|
|
|
|
|
|
|
DnnOperand *output_operand = &operands[thread_common_param->output_operand_index]; |
|
|
|
output_operand->dims[0] = number; |
|
|
|
output_operand->dims[1] = height - pad_size * 2; |
|
|
|
output_operand->dims[2] = width - pad_size * 2; |
|
|
|
output_operand->dims[3] = conv_params->output_num; |
|
|
|
output_operand->data_type = operands[input_operand_index].data_type; |
|
|
|
output_operand->length = calculate_operand_data_length(output_operand); |
|
|
|
if (output_operand->length <= 0) { |
|
|
|
av_log(thread_common_param->ctx, AV_LOG_ERROR, "The output data length overflow\n"); |
|
|
|
return (void *)DNN_ERROR; |
|
|
|
} |
|
|
|
output_operand->data = av_realloc(output_operand->data, output_operand->length); |
|
|
|
if (!output_operand->data) { |
|
|
|
av_log(thread_common_param->ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n"); |
|
|
|
return (void *)DNN_ERROR; |
|
|
|
} |
|
|
|
|
|
|
|
output = output_operand->data; |
|
|
|
float *output = thread_common_param->output_data; |
|
|
|
output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_start - pad_size); |
|
|
|
|
|
|
|
av_assert0(channel == conv_params->input_num); |
|
|
|
@@ -213,16 +195,33 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ |
|
|
|
pthread_t *thread_id = av_malloc(thread_num * sizeof(pthread_t)); |
|
|
|
#endif |
|
|
|
thread_param **thread_param = av_malloc(thread_num * sizeof(*thread_param)); |
|
|
|
void *res; |
|
|
|
int error_flag = DNN_SUCCESS; |
|
|
|
|
|
|
|
//struct used to pass parameters |
|
|
|
thread_common_param thread_common_param; |
|
|
|
const ConvolutionalParams *conv_params = (const ConvolutionalParams *)(parameters); |
|
|
|
int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0; |
|
|
|
DnnOperand *output_operand = &operands[output_operand_index]; |
|
|
|
|
|
|
|
output_operand->dims[0] = operands[input_operand_indexes[0]].dims[0]; |
|
|
|
output_operand->dims[1] = operands[input_operand_indexes[0]].dims[1] - pad_size * 2; |
|
|
|
output_operand->dims[2] = operands[input_operand_indexes[0]].dims[2] - pad_size * 2; |
|
|
|
output_operand->dims[3] = conv_params->output_num; |
|
|
|
output_operand->data_type = operands[input_operand_indexes[0]].data_type; |
|
|
|
output_operand->length = calculate_operand_data_length(output_operand); |
|
|
|
if (output_operand->length <= 0) { |
|
|
|
av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n"); |
|
|
|
return DNN_ERROR; |
|
|
|
} |
|
|
|
output_operand->data = av_realloc(output_operand->data, output_operand->length); |
|
|
|
if (!output_operand->data) { |
|
|
|
av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n"); |
|
|
|
return DNN_ERROR; |
|
|
|
} |
|
|
|
thread_common_param.output_data = output_operand->data; |
|
|
|
thread_common_param.operands = operands; |
|
|
|
thread_common_param.input_operand_indexes = input_operand_indexes; |
|
|
|
thread_common_param.output_operand_index = output_operand_index; |
|
|
|
thread_common_param.parameters = parameters; |
|
|
|
thread_common_param.ctx = ctx; |
|
|
|
|
|
|
|
#if HAVE_PTHREAD_CANCEL |
|
|
|
thread_common_param.thread_num = thread_num; |
|
|
|
|
|
|
|
@@ -236,9 +235,7 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ |
|
|
|
|
|
|
|
//join threads, res gets function return |
|
|
|
for (int i = 0; i < thread_num; i++){ |
|
|
|
pthread_join(thread_id[i], &res); |
|
|
|
if ((int)res != DNN_SUCCESS) |
|
|
|
error_flag = (int)res; |
|
|
|
pthread_join(thread_id[i], NULL); |
|
|
|
} |
|
|
|
|
|
|
|
//release memory |
|
|
|
@@ -252,12 +249,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ |
|
|
|
thread_param[0] = av_malloc(sizeof(thread_param)); |
|
|
|
thread_param[0]->thread_common_param = &thread_common_param; |
|
|
|
thread_param[0]->thread_index = 0; |
|
|
|
res = dnn_execute_layer_conv2d_thread((void *)thread_param[0]); |
|
|
|
if ((int)res != DNN_SUCCESS) |
|
|
|
error_flag = (int)res; |
|
|
|
dnn_execute_layer_conv2d_thread((void *)thread_param[0]); |
|
|
|
av_free(thread_param[0]); |
|
|
|
#endif |
|
|
|
|
|
|
|
av_free(thread_param); |
|
|
|
return error_flag; |
|
|
|
return DNN_SUCCESS; |
|
|
|
} |