|
|
|
@@ -644,16 +644,34 @@ static void nvenc_override_rate_control(AVCodecContext *avctx) |
|
|
|
static av_cold int nvenc_recalc_surfaces(AVCodecContext *avctx) |
|
|
|
{ |
|
|
|
NvencContext *ctx = avctx->priv_data; |
|
|
|
int nb_surfaces = 0; |
|
|
|
// default minimum of 4 surfaces |
|
|
|
// multiply by 2 for number of NVENCs on gpu (hardcode to 2) |
|
|
|
// another multiply by 2 to avoid blocking next PBB group |
|
|
|
int nb_surfaces = FFMAX(4, ctx->encode_config.frameIntervalP * 2 * 2); |
|
|
|
|
|
|
|
// lookahead enabled |
|
|
|
if (ctx->rc_lookahead > 0) { |
|
|
|
nb_surfaces = ctx->rc_lookahead + ((ctx->encode_config.frameIntervalP > 0) ? ctx->encode_config.frameIntervalP : 0) + 1 + 4; |
|
|
|
if (ctx->nb_surfaces < nb_surfaces) { |
|
|
|
// +1 is to account for lkd_bound calculation later |
|
|
|
// +4 is to allow sufficient pipelining with lookahead |
|
|
|
nb_surfaces = FFMAX(1, FFMAX(nb_surfaces, ctx->rc_lookahead + ctx->encode_config.frameIntervalP + 1 + 4)); |
|
|
|
if (nb_surfaces > ctx->nb_surfaces && ctx->nb_surfaces > 0) |
|
|
|
{ |
|
|
|
av_log(avctx, AV_LOG_WARNING, |
|
|
|
"Defined rc_lookahead requires more surfaces, " |
|
|
|
"increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces); |
|
|
|
ctx->nb_surfaces = nb_surfaces; |
|
|
|
} |
|
|
|
ctx->nb_surfaces = FFMAX(nb_surfaces, ctx->nb_surfaces); |
|
|
|
} else { |
|
|
|
if (ctx->encode_config.frameIntervalP > 1 && ctx->nb_surfaces < nb_surfaces && ctx->nb_surfaces > 0) |
|
|
|
{ |
|
|
|
av_log(avctx, AV_LOG_WARNING, |
|
|
|
"Defined b-frame requires more surfaces, " |
|
|
|
"increasing used surfaces %d -> %d\n", ctx->nb_surfaces, nb_surfaces); |
|
|
|
ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, nb_surfaces); |
|
|
|
} |
|
|
|
else if (ctx->nb_surfaces <= 0) |
|
|
|
ctx->nb_surfaces = nb_surfaces; |
|
|
|
// otherwise use user specified value |
|
|
|
} |
|
|
|
|
|
|
|
ctx->nb_surfaces = FFMAX(1, FFMIN(MAX_REGISTERED_FRAMES, ctx->nb_surfaces)); |
|
|
|
@@ -1086,6 +1104,7 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx) |
|
|
|
NvencContext *ctx = avctx->priv_data; |
|
|
|
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; |
|
|
|
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs; |
|
|
|
NvencSurface* tmp_surface = &ctx->surfaces[idx]; |
|
|
|
|
|
|
|
NVENCSTATUS nv_status; |
|
|
|
NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 }; |
|
|
|
@@ -1121,8 +1140,6 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx) |
|
|
|
ctx->surfaces[idx].height = allocSurf.height; |
|
|
|
} |
|
|
|
|
|
|
|
ctx->surfaces[idx].lockCount = 0; |
|
|
|
|
|
|
|
/* 1MB is large enough to hold most output frames. |
|
|
|
* NVENC increases this automaticaly if it is not enough. */ |
|
|
|
allocOut.size = 1024 * 1024; |
|
|
|
@@ -1141,6 +1158,8 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx) |
|
|
|
ctx->surfaces[idx].output_surface = allocOut.bitstreamBuffer; |
|
|
|
ctx->surfaces[idx].size = allocOut.size; |
|
|
|
|
|
|
|
av_fifo_generic_write(ctx->unused_surface_queue, &tmp_surface, sizeof(tmp_surface), NULL); |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
@@ -1156,6 +1175,11 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx) |
|
|
|
ctx->timestamp_list = av_fifo_alloc(ctx->nb_surfaces * sizeof(int64_t)); |
|
|
|
if (!ctx->timestamp_list) |
|
|
|
return AVERROR(ENOMEM); |
|
|
|
|
|
|
|
ctx->unused_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*)); |
|
|
|
if (!ctx->unused_surface_queue) |
|
|
|
return AVERROR(ENOMEM); |
|
|
|
|
|
|
|
ctx->output_surface_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(NvencSurface*)); |
|
|
|
if (!ctx->output_surface_queue) |
|
|
|
return AVERROR(ENOMEM); |
|
|
|
@@ -1222,6 +1246,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx) |
|
|
|
av_fifo_freep(&ctx->timestamp_list); |
|
|
|
av_fifo_freep(&ctx->output_surface_ready_queue); |
|
|
|
av_fifo_freep(&ctx->output_surface_queue); |
|
|
|
av_fifo_freep(&ctx->unused_surface_queue); |
|
|
|
|
|
|
|
if (ctx->surfaces && avctx->pix_fmt == AV_PIX_FMT_CUDA) { |
|
|
|
for (i = 0; i < ctx->nb_surfaces; ++i) { |
|
|
|
@@ -1305,16 +1330,14 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx) |
|
|
|
|
|
|
|
static NvencSurface *get_free_frame(NvencContext *ctx) |
|
|
|
{ |
|
|
|
int i; |
|
|
|
NvencSurface *tmp_surf; |
|
|
|
|
|
|
|
for (i = 0; i < ctx->nb_surfaces; i++) { |
|
|
|
if (!ctx->surfaces[i].lockCount) { |
|
|
|
ctx->surfaces[i].lockCount = 1; |
|
|
|
return &ctx->surfaces[i]; |
|
|
|
} |
|
|
|
} |
|
|
|
if (!(av_fifo_size(ctx->unused_surface_queue) > 0)) |
|
|
|
// queue empty |
|
|
|
return NULL; |
|
|
|
|
|
|
|
return NULL; |
|
|
|
av_fifo_generic_read(ctx->unused_surface_queue, &tmp_surf, sizeof(tmp_surf), NULL); |
|
|
|
return tmp_surf; |
|
|
|
} |
|
|
|
|
|
|
|
static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *nv_surface, |
|
|
|
@@ -1712,7 +1735,6 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, |
|
|
|
} |
|
|
|
|
|
|
|
if (res) { |
|
|
|
inSurf->lockCount = 0; |
|
|
|
return res; |
|
|
|
} |
|
|
|
|
|
|
|
@@ -1790,8 +1812,7 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt, |
|
|
|
if (res) |
|
|
|
return res; |
|
|
|
|
|
|
|
av_assert0(tmpoutsurf->lockCount); |
|
|
|
tmpoutsurf->lockCount--; |
|
|
|
av_fifo_generic_write(ctx->unused_surface_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL); |
|
|
|
|
|
|
|
*got_packet = 1; |
|
|
|
} else { |
|
|
|
|