@@ -14,6 +14,8 @@ libavutil: 2017-10-21 | |||||
API changes, most recent first: | API changes, most recent first: | ||||
2021-04-17 - xxxxxxxxxx - lavu 56.73.100 - frame.h detection_bbox.h | |||||
Add AV_FRAME_DATA_DETECTION_BBOXES | |||||
2021-04-06 - xxxxxxxxxx - lavf 58.78.100 - avformat.h | 2021-04-06 - xxxxxxxxxx - lavf 58.78.100 - avformat.h | ||||
Add avformat_index_get_entries_count(), avformat_index_get_entry(), | Add avformat_index_get_entries_count(), avformat_index_get_entry(), | ||||
@@ -21,6 +21,7 @@ HEADERS = adler32.h \ | |||||
cpu.h \ | cpu.h \ | ||||
crc.h \ | crc.h \ | ||||
des.h \ | des.h \ | ||||
detection_bbox.h \ | |||||
dict.h \ | dict.h \ | ||||
display.h \ | display.h \ | ||||
dovi_meta.h \ | dovi_meta.h \ | ||||
@@ -113,6 +114,7 @@ OBJS = adler32.o \ | |||||
cpu.o \ | cpu.o \ | ||||
crc.o \ | crc.o \ | ||||
des.o \ | des.o \ | ||||
detection_bbox.o \ | |||||
dict.o \ | dict.o \ | ||||
display.o \ | display.o \ | ||||
dovi_meta.o \ | dovi_meta.o \ | ||||
@@ -0,0 +1,73 @@ | |||||
/* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "detection_bbox.h" | |||||
AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size) | |||||
{ | |||||
size_t size; | |||||
struct { | |||||
AVDetectionBBoxHeader header; | |||||
AVDetectionBBox boxes[1]; | |||||
} *ret; | |||||
size = sizeof(*ret); | |||||
if (nb_bboxes - 1 > (SIZE_MAX - size) / sizeof(*ret->boxes)) | |||||
return NULL; | |||||
size += sizeof(*ret->boxes) * (nb_bboxes - 1); | |||||
ret = av_mallocz(size); | |||||
if (!ret) | |||||
return NULL; | |||||
ret->header.nb_bboxes = nb_bboxes; | |||||
ret->header.bbox_size = sizeof(*ret->boxes); | |||||
ret->header.bboxes_offset = (char *)&ret->boxes - (char *)&ret->header; | |||||
if (out_size) | |||||
*out_size = size; | |||||
return &ret->header; | |||||
} | |||||
AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes) | |||||
{ | |||||
AVBufferRef *buf; | |||||
AVDetectionBBoxHeader *header; | |||||
size_t size; | |||||
header = av_detection_bbox_alloc(nb_bboxes, &size); | |||||
if (!header) | |||||
return NULL; | |||||
if (size > INT_MAX) { | |||||
av_freep(&header); | |||||
return NULL; | |||||
} | |||||
buf = av_buffer_create((uint8_t *)header, size, NULL, NULL, 0); | |||||
if (!buf) { | |||||
av_freep(&header); | |||||
return NULL; | |||||
} | |||||
if (!av_frame_new_side_data_from_buf(frame, AV_FRAME_DATA_DETECTION_BBOXES, buf)) { | |||||
av_buffer_unref(&buf); | |||||
return NULL; | |||||
} | |||||
return header; | |||||
} |
@@ -0,0 +1,107 @@ | |||||
/* | |||||
* This file is part of FFmpeg. | |||||
* | |||||
* FFmpeg is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* FFmpeg is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with FFmpeg; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#ifndef AVUTIL_DETECTION_BBOX_H | |||||
#define AVUTIL_DETECTION_BBOX_H | |||||
#include "rational.h" | |||||
#include "avassert.h" | |||||
#include "frame.h" | |||||
typedef struct AVDetectionBBox { | |||||
/** | |||||
* Distance in pixels from the left/top edge of the frame, | |||||
* together with width and height, defining the bounding box. | |||||
*/ | |||||
int x; | |||||
int y; | |||||
int w; | |||||
int h; | |||||
#define AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE 64 | |||||
/** | |||||
* Detect result with confidence | |||||
*/ | |||||
char detect_label[AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]; | |||||
AVRational detect_confidence; | |||||
/** | |||||
* At most 4 classifications based on the detected bounding box. | |||||
* For example, we can get max 4 different attributes with 4 different | |||||
* DNN models on one bounding box. | |||||
* classify_count is zero if no classification. | |||||
*/ | |||||
#define AV_NUM_BBOX_CLASSIFY 4 | |||||
uint32_t classify_count; | |||||
char classify_labels[AV_NUM_BBOX_CLASSIFY][AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]; | |||||
AVRational classify_confidences[AV_NUM_BBOX_CLASSIFY]; | |||||
} AVDetectionBBox; | |||||
typedef struct AVDetectionBBoxHeader { | |||||
/** | |||||
* Information about how the bounding box is generated. | |||||
* for example, the DNN model name. | |||||
*/ | |||||
char source[256]; | |||||
/** | |||||
* Number of bounding boxes in the array. | |||||
*/ | |||||
uint32_t nb_bboxes; | |||||
/** | |||||
* Offset in bytes from the beginning of this structure at which | |||||
* the array of bounding boxes starts. | |||||
*/ | |||||
size_t bboxes_offset; | |||||
/** | |||||
* Size of each bounding box in bytes. | |||||
*/ | |||||
size_t bbox_size; | |||||
} AVDetectionBBoxHeader; | |||||
/* | |||||
* Get the bounding box at the specified {@code idx}. Must be between 0 and nb_bboxes. | |||||
*/ | |||||
static av_always_inline AVDetectionBBox* | |||||
av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx) | |||||
{ | |||||
av_assert0(idx < header->nb_bboxes); | |||||
return (AVDetectionBBox *)((uint8_t *)header + header->bboxes_offset + | |||||
idx * header->bbox_size); | |||||
} | |||||
/** | |||||
* Allocates memory for AVDetectionBBoxHeader, plus an array of {@code nb_bboxes} | |||||
* AVDetectionBBox, and initializes the variables. | |||||
* Can be freed with a normal av_free() call. | |||||
* | |||||
* @param out_size if non-NULL, the size in bytes of the resulting data array is | |||||
* written here. | |||||
*/ | |||||
AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size); | |||||
/** | |||||
* Allocates memory for AVDetectionBBoxHeader, plus an array of {@code nb_bboxes} | |||||
* AVDetectionBBox, in the given AVFrame {@code frame} as AVFrameSideData of type | |||||
* AV_FRAME_DATA_DETECTION_BBOXES and initializes the variables. | |||||
*/ | |||||
AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes); | |||||
#endif |
@@ -853,6 +853,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type) | |||||
case AV_FRAME_DATA_VIDEO_ENC_PARAMS: return "Video encoding parameters"; | case AV_FRAME_DATA_VIDEO_ENC_PARAMS: return "Video encoding parameters"; | ||||
case AV_FRAME_DATA_SEI_UNREGISTERED: return "H.26[45] User Data Unregistered SEI message"; | case AV_FRAME_DATA_SEI_UNREGISTERED: return "H.26[45] User Data Unregistered SEI message"; | ||||
case AV_FRAME_DATA_FILM_GRAIN_PARAMS: return "Film grain parameters"; | case AV_FRAME_DATA_FILM_GRAIN_PARAMS: return "Film grain parameters"; | ||||
case AV_FRAME_DATA_DETECTION_BBOXES: return "Bounding boxes for object detection and classification"; | |||||
} | } | ||||
return NULL; | return NULL; | ||||
} | } | ||||
@@ -198,6 +198,12 @@ enum AVFrameSideDataType { | |||||
* Must be present for every frame which should have film grain applied. | * Must be present for every frame which should have film grain applied. | ||||
*/ | */ | ||||
AV_FRAME_DATA_FILM_GRAIN_PARAMS, | AV_FRAME_DATA_FILM_GRAIN_PARAMS, | ||||
/** | |||||
* Bounding boxes for object detection and classification, | |||||
* as described by AVDetectionBBoxHeader. | |||||
*/ | |||||
AV_FRAME_DATA_DETECTION_BBOXES, | |||||
}; | }; | ||||
enum AVActiveFormatDescription { | enum AVActiveFormatDescription { | ||||
@@ -79,7 +79,7 @@ | |||||
*/ | */ | ||||
#define LIBAVUTIL_VERSION_MAJOR 56 | #define LIBAVUTIL_VERSION_MAJOR 56 | ||||
#define LIBAVUTIL_VERSION_MINOR 72 | |||||
#define LIBAVUTIL_VERSION_MINOR 73 | |||||
#define LIBAVUTIL_VERSION_MICRO 100 | #define LIBAVUTIL_VERSION_MICRO 100 | ||||
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ | #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ | ||||