@@ -14,6 +14,8 @@ libavutil: 2017-10-21 | |||
API changes, most recent first: | |||
2021-04-17 - xxxxxxxxxx - lavu 56.73.100 - frame.h detection_bbox.h | |||
Add AV_FRAME_DATA_DETECTION_BBOXES | |||
2021-04-06 - xxxxxxxxxx - lavf 58.78.100 - avformat.h | |||
Add avformat_index_get_entries_count(), avformat_index_get_entry(), | |||
@@ -21,6 +21,7 @@ HEADERS = adler32.h \ | |||
cpu.h \ | |||
crc.h \ | |||
des.h \ | |||
detection_bbox.h \ | |||
dict.h \ | |||
display.h \ | |||
dovi_meta.h \ | |||
@@ -113,6 +114,7 @@ OBJS = adler32.o \ | |||
cpu.o \ | |||
crc.o \ | |||
des.o \ | |||
detection_bbox.o \ | |||
dict.o \ | |||
display.o \ | |||
dovi_meta.o \ | |||
@@ -0,0 +1,73 @@ | |||
/* | |||
* This file is part of FFmpeg. | |||
* | |||
* FFmpeg is free software; you can redistribute it and/or | |||
* modify it under the terms of the GNU Lesser General Public | |||
* License as published by the Free Software Foundation; either | |||
* version 2.1 of the License, or (at your option) any later version. | |||
* | |||
* FFmpeg is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
* Lesser General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU Lesser General Public | |||
* License along with FFmpeg; if not, write to the Free Software | |||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
*/ | |||
#include "detection_bbox.h" | |||
AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size) | |||
{ | |||
size_t size; | |||
struct { | |||
AVDetectionBBoxHeader header; | |||
AVDetectionBBox boxes[1]; | |||
} *ret; | |||
size = sizeof(*ret); | |||
if (nb_bboxes - 1 > (SIZE_MAX - size) / sizeof(*ret->boxes)) | |||
return NULL; | |||
size += sizeof(*ret->boxes) * (nb_bboxes - 1); | |||
ret = av_mallocz(size); | |||
if (!ret) | |||
return NULL; | |||
ret->header.nb_bboxes = nb_bboxes; | |||
ret->header.bbox_size = sizeof(*ret->boxes); | |||
ret->header.bboxes_offset = (char *)&ret->boxes - (char *)&ret->header; | |||
if (out_size) | |||
*out_size = size; | |||
return &ret->header; | |||
} | |||
AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes) | |||
{ | |||
AVBufferRef *buf; | |||
AVDetectionBBoxHeader *header; | |||
size_t size; | |||
header = av_detection_bbox_alloc(nb_bboxes, &size); | |||
if (!header) | |||
return NULL; | |||
if (size > INT_MAX) { | |||
av_freep(&header); | |||
return NULL; | |||
} | |||
buf = av_buffer_create((uint8_t *)header, size, NULL, NULL, 0); | |||
if (!buf) { | |||
av_freep(&header); | |||
return NULL; | |||
} | |||
if (!av_frame_new_side_data_from_buf(frame, AV_FRAME_DATA_DETECTION_BBOXES, buf)) { | |||
av_buffer_unref(&buf); | |||
return NULL; | |||
} | |||
return header; | |||
} |
@@ -0,0 +1,107 @@ | |||
/* | |||
* This file is part of FFmpeg. | |||
* | |||
* FFmpeg is free software; you can redistribute it and/or | |||
* modify it under the terms of the GNU Lesser General Public | |||
* License as published by the Free Software Foundation; either | |||
* version 2.1 of the License, or (at your option) any later version. | |||
* | |||
* FFmpeg is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
* Lesser General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU Lesser General Public | |||
* License along with FFmpeg; if not, write to the Free Software | |||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
*/ | |||
#ifndef AVUTIL_DETECTION_BBOX_H | |||
#define AVUTIL_DETECTION_BBOX_H | |||
#include "rational.h" | |||
#include "avassert.h" | |||
#include "frame.h" | |||
typedef struct AVDetectionBBox { | |||
/** | |||
* Distance in pixels from the left/top edge of the frame, | |||
* together with width and height, defining the bounding box. | |||
*/ | |||
int x; | |||
int y; | |||
int w; | |||
int h; | |||
#define AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE 64 | |||
/** | |||
* Detect result with confidence | |||
*/ | |||
char detect_label[AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]; | |||
AVRational detect_confidence; | |||
/** | |||
* At most 4 classifications based on the detected bounding box. | |||
* For example, we can get max 4 different attributes with 4 different | |||
* DNN models on one bounding box. | |||
* classify_count is zero if no classification. | |||
*/ | |||
#define AV_NUM_BBOX_CLASSIFY 4 | |||
uint32_t classify_count; | |||
char classify_labels[AV_NUM_BBOX_CLASSIFY][AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]; | |||
AVRational classify_confidences[AV_NUM_BBOX_CLASSIFY]; | |||
} AVDetectionBBox; | |||
typedef struct AVDetectionBBoxHeader { | |||
/** | |||
* Information about how the bounding box is generated. | |||
* for example, the DNN model name. | |||
*/ | |||
char source[256]; | |||
/** | |||
* Number of bounding boxes in the array. | |||
*/ | |||
uint32_t nb_bboxes; | |||
/** | |||
* Offset in bytes from the beginning of this structure at which | |||
* the array of bounding boxes starts. | |||
*/ | |||
size_t bboxes_offset; | |||
/** | |||
* Size of each bounding box in bytes. | |||
*/ | |||
size_t bbox_size; | |||
} AVDetectionBBoxHeader; | |||
/* | |||
* Get the bounding box at the specified {@code idx}. Must be between 0 and nb_bboxes. | |||
*/ | |||
static av_always_inline AVDetectionBBox* | |||
av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx) | |||
{ | |||
av_assert0(idx < header->nb_bboxes); | |||
return (AVDetectionBBox *)((uint8_t *)header + header->bboxes_offset + | |||
idx * header->bbox_size); | |||
} | |||
/** | |||
* Allocates memory for AVDetectionBBoxHeader, plus an array of {@code nb_bboxes} | |||
* AVDetectionBBox, and initializes the variables. | |||
* Can be freed with a normal av_free() call. | |||
* | |||
* @param out_size if non-NULL, the size in bytes of the resulting data array is | |||
* written here. | |||
*/ | |||
AVDetectionBBoxHeader *av_detection_bbox_alloc(uint32_t nb_bboxes, size_t *out_size); | |||
/** | |||
* Allocates memory for AVDetectionBBoxHeader, plus an array of {@code nb_bboxes} | |||
* AVDetectionBBox, in the given AVFrame {@code frame} as AVFrameSideData of type | |||
* AV_FRAME_DATA_DETECTION_BBOXES and initializes the variables. | |||
*/ | |||
AVDetectionBBoxHeader *av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes); | |||
#endif |
@@ -853,6 +853,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type) | |||
case AV_FRAME_DATA_VIDEO_ENC_PARAMS: return "Video encoding parameters"; | |||
case AV_FRAME_DATA_SEI_UNREGISTERED: return "H.26[45] User Data Unregistered SEI message"; | |||
case AV_FRAME_DATA_FILM_GRAIN_PARAMS: return "Film grain parameters"; | |||
case AV_FRAME_DATA_DETECTION_BBOXES: return "Bounding boxes for object detection and classification"; | |||
} | |||
return NULL; | |||
} | |||
@@ -198,6 +198,12 @@ enum AVFrameSideDataType { | |||
* Must be present for every frame which should have film grain applied. | |||
*/ | |||
AV_FRAME_DATA_FILM_GRAIN_PARAMS, | |||
/** | |||
* Bounding boxes for object detection and classification, | |||
* as described by AVDetectionBBoxHeader. | |||
*/ | |||
AV_FRAME_DATA_DETECTION_BBOXES, | |||
}; | |||
enum AVActiveFormatDescription { | |||
@@ -79,7 +79,7 @@ | |||
*/ | |||
#define LIBAVUTIL_VERSION_MAJOR 56 | |||
#define LIBAVUTIL_VERSION_MINOR 72 | |||
#define LIBAVUTIL_VERSION_MINOR 73 | |||
#define LIBAVUTIL_VERSION_MICRO 100 | |||
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ | |||