24 #include "common/args.h"
25 #include "common/tools_common.h"
26 #include "common/video_writer.h"
27 #include "aom_ports/aom_timer.h"
29 #define OPTION_BUFFER_SIZE 1024
32 const char *output_filename;
33 char options[OPTION_BUFFER_SIZE];
34 struct AvxInputContext input_ctx;
48 static const arg_def_t outputfile =
49 ARG_DEF(
"o",
"output", 1,
"Output filename");
50 static const arg_def_t frames_arg =
51 ARG_DEF(
"f",
"frames", 1,
"Number of frames to encode");
52 static const arg_def_t threads_arg =
53 ARG_DEF(
"th",
"threads", 1,
"Number of threads to use");
54 static const arg_def_t width_arg = ARG_DEF(
"w",
"width", 1,
"Source width");
55 static const arg_def_t height_arg = ARG_DEF(
"h",
"height", 1,
"Source height");
56 static const arg_def_t timebase_arg =
57 ARG_DEF(
"t",
"timebase", 1,
"Timebase (num/den)");
58 static const arg_def_t bitrate_arg = ARG_DEF(
59 "b",
"target-bitrate", 1,
"Encoding bitrate, in kilobits per second");
60 static const arg_def_t spatial_layers_arg =
61 ARG_DEF(
"sl",
"spatial-layers", 1,
"Number of spatial SVC layers");
62 static const arg_def_t temporal_layers_arg =
63 ARG_DEF(
"tl",
"temporal-layers", 1,
"Number of temporal SVC layers");
64 static const arg_def_t layering_mode_arg =
65 ARG_DEF(
"lm",
"layering-mode", 1,
"Temporal layering scheme.");
66 static const arg_def_t kf_dist_arg =
67 ARG_DEF(
"k",
"kf-dist", 1,
"Number of frames between keyframes");
68 static const arg_def_t scale_factors_arg =
69 ARG_DEF(
"r",
"scale-factors", 1,
"Scale factors (lowest to highest layer)");
70 static const arg_def_t min_q_arg =
71 ARG_DEF(NULL,
"min-q", 1,
"Minimum quantizer");
72 static const arg_def_t max_q_arg =
73 ARG_DEF(NULL,
"max-q", 1,
"Maximum quantizer");
74 static const arg_def_t speed_arg =
75 ARG_DEF(
"sp",
"speed", 1,
"Speed configuration");
76 static const arg_def_t aqmode_arg =
77 ARG_DEF(
"aq",
"aqmode", 1,
"AQ mode off/on");
78 static const arg_def_t bitrates_arg =
79 ARG_DEF(
"bl",
"bitrates", 1,
80 "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
81 static const arg_def_t dropframe_thresh_arg =
82 ARG_DEF(NULL,
"drop-frame", 1,
"Temporal resampling threshold (buf %)");
83 static const arg_def_t error_resilient_arg =
84 ARG_DEF(NULL,
"error-resilient", 1,
"Error resilient flag");
86 #if CONFIG_AV1_HIGHBITDEPTH
87 static const struct arg_enum_list bitdepth_enum[] = {
91 static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
92 "d",
"bit-depth", 1,
"Bit depth for codec 8, 10 or 12. ", bitdepth_enum);
95 static const arg_def_t *svc_args[] = {
96 &frames_arg, &outputfile, &width_arg,
97 &height_arg, &timebase_arg, &bitrate_arg,
98 &spatial_layers_arg, &kf_dist_arg, &scale_factors_arg,
99 &min_q_arg, &max_q_arg, &temporal_layers_arg,
100 &layering_mode_arg, &threads_arg, &aqmode_arg,
101 #if CONFIG_AV1_HIGHBITDEPTH
104 &speed_arg, &bitrates_arg, &dropframe_thresh_arg,
105 &error_resilient_arg, NULL
108 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest));
110 static const char *exec_name;
112 void usage_exit(
void) {
113 fprintf(stderr,
"Usage: %s <options> input_filename -o output_filename\n",
115 fprintf(stderr,
"Options:\n");
116 arg_show_usage(stderr, svc_args);
120 static int file_is_y4m(
const char detect[4]) {
121 return memcmp(detect,
"YUV4", 4) == 0;
124 static int fourcc_is_ivf(
const char detect[4]) {
125 if (memcmp(detect,
"DKIF", 4) == 0) {
131 static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
134 static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
136 static void open_input_file(
struct AvxInputContext *input,
139 input->file = strcmp(input->filename,
"-") ? fopen(input->filename,
"rb")
140 : set_binary_mode(stdin);
142 if (!input->file) fatal(
"Failed to open input file");
144 if (!fseeko(input->file, 0, SEEK_END)) {
148 input->length = ftello(input->file);
153 input->pixel_aspect_ratio.numerator = 1;
154 input->pixel_aspect_ratio.denominator = 1;
159 input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
160 input->detect.position = 0;
162 if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
163 if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
164 input->only_i420) >= 0) {
165 input->file_type = FILE_TYPE_Y4M;
166 input->width = input->y4m.pic_w;
167 input->height = input->y4m.pic_h;
168 input->pixel_aspect_ratio.numerator = input->y4m.par_n;
169 input->pixel_aspect_ratio.denominator = input->y4m.par_d;
170 input->framerate.numerator = input->y4m.fps_n;
171 input->framerate.denominator = input->y4m.fps_d;
172 input->fmt = input->y4m.aom_fmt;
173 input->bit_depth = input->y4m.bit_depth;
175 fatal(
"Unsupported Y4M stream.");
177 }
else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
178 fatal(
"IVF is not supported as input.");
180 input->file_type = FILE_TYPE_RAW;
184 static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type,
char *input,
185 int *value0,
int *value1) {
186 if (type == SCALE_FACTOR) {
187 *value0 = (int)strtol(input, &input, 10);
189 *value1 = (int)strtol(input, &input, 10);
191 if (*value0 < option_min_values[SCALE_FACTOR] ||
192 *value1 < option_min_values[SCALE_FACTOR] ||
193 *value0 > option_max_values[SCALE_FACTOR] ||
194 *value1 > option_max_values[SCALE_FACTOR] ||
198 *value0 = atoi(input);
199 if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
207 int *option0,
int *option1) {
211 const char *delim =
",";
219 if (input == NULL || option0 == NULL ||
220 (option1 == NULL && type == SCALE_FACTOR))
223 input_string = malloc(strlen(input));
224 memcpy(input_string, input, strlen(input));
226 token = strtok(input_string, delim);
227 for (i = 0; i < num_layers; ++i) {
229 res = extract_option(type, token, option0 + i, option1 + i);
231 token = strtok(NULL, delim);
243 static void parse_command_line(
int argc,
const char **argv_,
251 char string_options[1024] = { 0 };
256 app_input->layering_mode = 0;
261 argv = argv_dup(argc - 1, argv_ + 1);
262 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
265 if (arg_match(&arg, &outputfile, argi)) {
266 app_input->output_filename = arg.val;
267 }
else if (arg_match(&arg, &width_arg, argi)) {
268 enc_cfg->
g_w = arg_parse_uint(&arg);
269 }
else if (arg_match(&arg, &height_arg, argi)) {
270 enc_cfg->
g_h = arg_parse_uint(&arg);
271 }
else if (arg_match(&arg, &timebase_arg, argi)) {
272 enc_cfg->
g_timebase = arg_parse_rational(&arg);
273 }
else if (arg_match(&arg, &bitrate_arg, argi)) {
275 }
else if (arg_match(&arg, &spatial_layers_arg, argi)) {
277 }
else if (arg_match(&arg, &temporal_layers_arg, argi)) {
279 }
else if (arg_match(&arg, &speed_arg, argi)) {
280 app_input->speed = arg_parse_uint(&arg);
281 if (app_input->speed > 9) {
282 warn(
"Mapping speed %d to speed 9.\n", app_input->speed);
284 }
else if (arg_match(&arg, &aqmode_arg, argi)) {
285 app_input->aq_mode = arg_parse_uint(&arg);
286 }
else if (arg_match(&arg, &threads_arg, argi)) {
287 enc_cfg->
g_threads = arg_parse_uint(&arg);
288 }
else if (arg_match(&arg, &layering_mode_arg, argi)) {
289 app_input->layering_mode = arg_parse_int(&arg);
290 }
else if (arg_match(&arg, &kf_dist_arg, argi)) {
293 }
else if (arg_match(&arg, &scale_factors_arg, argi)) {
294 parse_layer_options_from_string(svc_params, SCALE_FACTOR, arg.val,
297 }
else if (arg_match(&arg, &min_q_arg, argi)) {
299 }
else if (arg_match(&arg, &max_q_arg, argi)) {
301 #if CONFIG_AV1_HIGHBITDEPTH
302 }
else if (arg_match(&arg, &bitdepth_arg, argi)) {
303 enc_cfg->
g_bit_depth = arg_parse_enum_or_int(&arg);
318 die(
"Error: Invalid bit depth selected (%d)\n", enc_cfg->
g_bit_depth);
322 }
else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
324 }
else if (arg_match(&arg, &error_resilient_arg, argi)) {
327 die(
"Invalid value for error resilient (0, 1): %d.",
335 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
337 if (arg_match(&arg, &bitrates_arg, argi)) {
338 parse_layer_options_from_string(svc_params, BITRATE, arg.val,
346 if (strlen(string_options) > 0)
347 strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
350 for (argi = argv; *argi; ++argi)
351 if (argi[0][0] ==
'-' && strlen(argi[0]) > 1)
352 die(
"Error: Unrecognized option %s\n", *argi);
354 if (argv[0] == NULL) {
358 app_input->input_ctx.filename = argv[0];
361 open_input_file(&app_input->input_ctx, 0);
362 if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
363 enc_cfg->
g_w = app_input->input_ctx.width;
364 enc_cfg->
g_h = app_input->input_ctx.height;
367 if (enc_cfg->
g_w < 16 || enc_cfg->
g_w % 2 || enc_cfg->
g_h < 16 ||
369 die(
"Invalid resolution: %d x %d\n", enc_cfg->
g_w, enc_cfg->
g_h);
374 "width %u, height: %u\n"
375 "num: %d, den: %d, bitrate: %u\n"
383 static unsigned int mode_to_num_temporal_layers[10] = { 1, 2, 3, 3, 2,
385 static unsigned int mode_to_num_spatial_layers[10] = { 1, 1, 1, 1, 1,
389 struct RateControlMetrics {
406 double avg_st_encoding_bitrate;
408 double variance_st_encoding_bitrate;
427 static int read_frame(
struct AvxInputContext *input_ctx,
aom_image_t *img) {
428 FILE *f = input_ctx->file;
429 y4m_input *y4m = &input_ctx->y4m;
432 if (input_ctx->file_type == FILE_TYPE_Y4M) {
433 if (y4m_input_fetch_frame(y4m, f, img) < 1)
return 0;
435 shortread = read_yuv_frame(input_ctx, img);
441 static void close_input_file(
struct AvxInputContext *input) {
443 if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
452 static void set_rate_control_metrics(
struct RateControlMetrics *rc,
454 unsigned int ss_number_layers,
455 unsigned int ts_number_layers) {
457 ts_rate_decimator[0] = 1;
458 if (ts_number_layers == 2) {
459 ts_rate_decimator[0] = 2;
460 ts_rate_decimator[1] = 1;
462 if (ts_number_layers == 3) {
463 ts_rate_decimator[0] = 4;
464 ts_rate_decimator[1] = 2;
465 ts_rate_decimator[2] = 1;
469 for (
unsigned int sl = 0; sl < ss_number_layers; ++sl) {
470 unsigned int i = sl * ts_number_layers;
471 rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
473 1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
474 for (
unsigned int tl = 0; tl < ts_number_layers; ++tl) {
475 i = sl * ts_number_layers + tl;
477 rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
480 (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
481 (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
483 rc->layer_input_frames[tl] = 0;
484 rc->layer_enc_frames[tl] = 0;
485 rc->layer_encoding_bitrate[i] = 0.0;
486 rc->layer_avg_frame_size[i] = 0.0;
487 rc->layer_avg_rate_mismatch[i] = 0.0;
490 rc->window_count = 0;
491 rc->window_size = 15;
492 rc->avg_st_encoding_bitrate = 0.0;
493 rc->variance_st_encoding_bitrate = 0.0;
496 static void printout_rate_control_summary(
struct RateControlMetrics *rc,
498 unsigned int ss_number_layers,
499 unsigned int ts_number_layers) {
500 int tot_num_frames = 0;
501 double perc_fluctuation = 0.0;
502 printf(
"Total number of processed frames: %d\n\n", frame_cnt - 1);
503 printf(
"Rate control layer stats for %u layer(s):\n\n", ts_number_layers);
504 for (
unsigned int sl = 0; sl < ss_number_layers; ++sl) {
506 for (
unsigned int tl = 0; tl < ts_number_layers; ++tl) {
507 unsigned int i = sl * ts_number_layers + tl;
508 const int num_dropped =
509 tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
510 : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
511 tot_num_frames += rc->layer_input_frames[tl];
512 rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
513 rc->layer_encoding_bitrate[i] /
515 rc->layer_avg_frame_size[i] =
516 rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
517 rc->layer_avg_rate_mismatch[i] =
518 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
519 printf(
"For layer#: %u %u \n", sl, tl);
520 printf(
"Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
521 rc->layer_encoding_bitrate[i]);
522 printf(
"Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
523 rc->layer_avg_frame_size[i]);
524 printf(
"Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
526 "Number of input frames, encoded (non-key) frames, "
527 "and perc dropped frames: %d %d %f\n",
528 rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
529 100.0 * num_dropped / rc->layer_input_frames[tl]);
533 rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
534 rc->variance_st_encoding_bitrate =
535 rc->variance_st_encoding_bitrate / rc->window_count -
536 (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
537 perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
538 rc->avg_st_encoding_bitrate;
539 printf(
"Short-time stats, for window of %d frames:\n", rc->window_size);
540 printf(
"Average, rms-variance, and percent-fluct: %f %f %f\n",
541 rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
543 if (frame_cnt - 1 != tot_num_frames)
544 die(
"Error: Number of input frames not equal to output!\n");
548 static void set_layer_pattern(
int layering_mode,
int superframe_cnt,
551 int *use_svc_control,
int spatial_layer_id,
552 int is_key_frame,
int ksvc_mode) {
554 int enable_longterm_temporal_ref = 1;
555 int shift = (layering_mode == 7) ? 2 : 0;
556 *use_svc_control = 1;
559 int base_count = superframe_cnt >> 2;
563 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->
ref_idx[i] = i;
564 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->
reference[i] = 0;
565 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->
refresh[i] = 0;
572 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
574 switch (layering_mode) {
578 ref_frame_config->
refresh[0] = 1;
579 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
585 if (superframe_cnt % 2 == 0) {
588 ref_frame_config->
refresh[0] = 1;
589 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
593 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
601 if (superframe_cnt % 4 == 0) {
605 ref_frame_config->
refresh[0] = 1;
606 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
607 }
else if ((superframe_cnt - 1) % 4 == 0) {
610 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
611 }
else if ((superframe_cnt - 2) % 4 == 0) {
614 ref_frame_config->
refresh[1] = 1;
615 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
616 }
else if ((superframe_cnt - 3) % 4 == 0) {
621 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 1;
622 ref_frame_config->
ref_idx[SVC_LAST2_FRAME] = 0;
623 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
634 ref_frame_config->
ref_idx[SVC_GOLDEN_FRAME] = 3;
636 lag_index = 4 + (base_count % 4);
638 ref_frame_config->
ref_idx[SVC_ALTREF_FRAME] = lag_index;
639 if (superframe_cnt % 4 == 0) {
643 ref_frame_config->
refresh[0] = 1;
644 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
646 if (base_count % 10 == 0) ref_frame_config->
refresh[3] = 1;
648 ref_frame_config->
refresh[lag_index] = 1;
649 }
else if ((superframe_cnt - 1) % 4 == 0) {
652 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
653 }
else if ((superframe_cnt - 2) % 4 == 0) {
656 ref_frame_config->
refresh[1] = 1;
657 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
658 }
else if ((superframe_cnt - 3) % 4 == 0) {
663 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 1;
664 ref_frame_config->
ref_idx[SVC_LAST2_FRAME] = 0;
665 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
668 ref_frame_config->
reference[SVC_GOLDEN_FRAME] = 1;
669 ref_frame_config->
reference[SVC_ALTREF_FRAME] = 1;
677 if (superframe_cnt % 4 == 0) {
681 ref_frame_config->
refresh[0] = 1;
682 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
683 }
else if ((superframe_cnt - 1) % 4 == 0) {
686 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
687 }
else if ((superframe_cnt - 2) % 4 == 0) {
690 ref_frame_config->
refresh[3] = 1;
691 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
692 }
else if ((superframe_cnt - 3) % 4 == 0) {
695 ref_frame_config->
reference[SVC_GOLDEN_FRAME] = 1;
703 ref_frame_config->
refresh[0] = 1;
704 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
708 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 1;
709 ref_frame_config->
ref_idx[SVC_GOLDEN_FRAME] = 0;
710 ref_frame_config->
refresh[1] = 1;
711 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
712 ref_frame_config->
reference[SVC_GOLDEN_FRAME] = 1;
724 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
725 ref_frame_config->
ref_idx[i] = 0;
726 ref_frame_config->
refresh[0] = 1;
727 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
732 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
733 ref_frame_config->
ref_idx[i] = 0;
734 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 1;
735 ref_frame_config->
refresh[1] = 1;
736 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
737 ref_frame_config->
reference[SVC_GOLDEN_FRAME] = 1;
742 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
743 ref_frame_config->
ref_idx[i] = 1;
744 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 2;
745 ref_frame_config->
refresh[2] = 1;
746 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
747 ref_frame_config->
reference[SVC_GOLDEN_FRAME] = 1;
750 if (enable_longterm_temporal_ref) {
751 ref_frame_config->
ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
752 ref_frame_config->
reference[SVC_ALTREF_FRAME] = 1;
753 if (base_count % 10 == 0)
754 ref_frame_config->
refresh[REF_FRAMES - 1] = 1;
771 ref_frame_config->
reference[SVC_LAST_FRAME] = 1;
772 if (superframe_cnt % 4 == 0) {
778 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
779 ref_frame_config->
ref_idx[i] = 0;
780 ref_frame_config->
refresh[0] = 1;
785 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
786 ref_frame_config->
ref_idx[i] = 0;
787 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 1;
788 ref_frame_config->
refresh[1] = 1;
793 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
794 ref_frame_config->
ref_idx[i] = 1;
795 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 2;
796 ref_frame_config->
refresh[2] = 1;
798 }
else if ((superframe_cnt - 1) % 4 == 0) {
805 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
806 ref_frame_config->
ref_idx[i] = 0;
807 ref_frame_config->
ref_idx[SVC_GOLDEN_FRAME] = 3;
808 ref_frame_config->
refresh[3] = 1;
813 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
814 ref_frame_config->
ref_idx[i] = 3;
815 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 1;
816 ref_frame_config->
ref_idx[SVC_LAST2_FRAME] = 4;
817 ref_frame_config->
refresh[4] = 1;
822 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
823 ref_frame_config->
ref_idx[i] = 4;
824 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 2;
826 }
else if ((superframe_cnt - 2) % 4 == 0) {
833 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
834 ref_frame_config->
ref_idx[i] = 0;
835 ref_frame_config->
ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
836 ref_frame_config->
refresh[5 - shift] = 1;
841 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
842 ref_frame_config->
ref_idx[i] = 5 - shift;
843 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 1;
844 ref_frame_config->
ref_idx[SVC_LAST3_FRAME] = 6 - shift;
845 ref_frame_config->
refresh[6 - shift] = 1;
850 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
851 ref_frame_config->
ref_idx[i] = 6 - shift;
852 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 2;
853 ref_frame_config->
ref_idx[SVC_LAST3_FRAME] = 7 - shift;
854 ref_frame_config->
refresh[7 - shift] = 1;
856 }
else if ((superframe_cnt - 3) % 4 == 0) {
863 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
864 ref_frame_config->
ref_idx[i] = 0;
865 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 5 - shift;
866 ref_frame_config->
ref_idx[SVC_GOLDEN_FRAME] = 3;
867 ref_frame_config->
refresh[3] = 1;
871 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
872 ref_frame_config->
ref_idx[i] = 0;
873 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 6 - shift;
874 ref_frame_config->
ref_idx[SVC_GOLDEN_FRAME] = 3;
875 ref_frame_config->
ref_idx[SVC_LAST2_FRAME] = 4;
876 ref_frame_config->
refresh[4] = 1;
880 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
881 ref_frame_config->
ref_idx[i] = 0;
882 ref_frame_config->
ref_idx[SVC_LAST_FRAME] = 7 - shift;
883 ref_frame_config->
ref_idx[SVC_GOLDEN_FRAME] = 4;
888 ref_frame_config->
reference[SVC_GOLDEN_FRAME] = 1;
894 layering_mode == 7) {
895 ref_frame_config->
ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
896 ref_frame_config->
reference[SVC_ALTREF_FRAME] = 1;
898 ref_frame_config->
refresh[REF_FRAMES - 1] = 1;
901 default: assert(0); die(
"Error: Unsupported temporal layering mode!\n");
905 int main(
int argc,
const char **argv) {
908 AvxVideoWriter *total_layer_file = NULL;
917 int frame_duration = 1;
922 struct RateControlMetrics rc;
924 int64_t cx_time_sl[3];
925 double sum_bitrate = 0.0;
926 double sum_bitrate2 = 0.0;
927 double framerate = 30.0;
928 int use_svc_control = 1;
929 int set_err_resil_frame = 0;
930 zero(rc.layer_target_bitrate);
932 memset(&app_input, 0,
sizeof(AppInput));
933 memset(&svc_params, 0,
sizeof(svc_params));
937 const int test_dynamic_scaling_single_layer = 0;
940 app_input.input_ctx.framerate.numerator = 30;
941 app_input.input_ctx.framerate.denominator = 1;
942 app_input.input_ctx.only_i420 = 1;
943 app_input.input_ctx.bit_depth = 0;
968 parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
973 unsigned int width = cfg.
g_w;
974 unsigned int height = cfg.
g_h;
976 if (ts_number_layers !=
977 mode_to_num_temporal_layers[app_input.layering_mode] ||
978 ss_number_layers != mode_to_num_spatial_layers[app_input.layering_mode]) {
979 die(
"Number of layers doesn't match layering mode.");
983 if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
985 die(
"Failed to allocate image", width, height);
994 unsigned int total_rate = 0;
995 for (i = 0; i < ss_number_layers; i++) {
1001 die(
"Incorrect total target bitrate");
1005 if (ts_number_layers == 2) {
1008 }
else if (ts_number_layers == 3) {
1015 set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1017 if (app_input.input_ctx.file_type == FILE_TYPE_Y4M) {
1018 if (app_input.input_ctx.width != cfg.
g_w ||
1019 app_input.input_ctx.height != cfg.
g_h) {
1020 die(
"Incorrect width or height: %d x %d", cfg.
g_w, cfg.
g_h);
1022 if (app_input.input_ctx.framerate.numerator != cfg.
g_timebase.
den ||
1023 app_input.input_ctx.framerate.denominator != cfg.
g_timebase.
num) {
1024 die(
"Incorrect framerate: numerator %d denominator %d",
1030 info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1031 info.frame_width = cfg.
g_w;
1032 info.frame_height = cfg.
g_h;
1036 for (
unsigned int sl = 0; sl < ss_number_layers; ++sl) {
1037 for (
unsigned tl = 0; tl < ts_number_layers; ++tl) {
1038 i = sl * ts_number_layers + tl;
1039 char file_name[PATH_MAX];
1041 snprintf(file_name,
sizeof(file_name),
"%s_%u.av1",
1042 app_input.output_filename, i);
1043 outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1044 if (!outfile[i]) die(
"Failed to open %s for writing", file_name);
1048 aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1049 if (!total_layer_file)
1050 die(
"Failed to open %s for writing", app_input.output_filename);
1055 die(
"Failed to initialize encoder");
1074 for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1078 for (i = 0; i < ss_number_layers; ++i) {
1082 if (ss_number_layers == 2) {
1085 }
else if (ss_number_layers == 3) {
1098 const int max_intra_size_pct = 300;
1100 max_intra_size_pct);
1103 for (
unsigned int slx = 0; slx < ss_number_layers; slx++) cx_time_sl[slx] = 0;
1105 while (frame_avail || got_data) {
1106 struct aom_usec_timer timer;
1107 frame_avail = read_frame(&(app_input.input_ctx), &raw);
1108 int is_key_frame = (frame_cnt % cfg.
kf_max_dist) == 0;
1110 for (
unsigned int slx = 0; slx < ss_number_layers; slx++) {
1117 set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
1118 &ref_frame_config, &use_svc_control, slx, is_key_frame,
1119 (app_input.layering_mode == 9));
1121 if (use_svc_control)
1124 if (set_err_resil_frame) {
1127 int err_resil_mode =
1134 if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
1136 if (test_dynamic_scaling_single_layer) {
1137 if (frame_cnt >= 200 && frame_cnt <= 400) {
1149 aom_usec_timer_start(&timer);
1151 die_codec(&codec,
"Failed to encode frame");
1152 aom_usec_timer_mark(&timer);
1153 cx_time += aom_usec_timer_elapsed(&timer);
1154 cx_time_sl[slx] += aom_usec_timer_elapsed(&timer);
1159 switch (pkt->
kind) {
1162 sl < ss_number_layers; ++sl) {
1164 tl < ts_number_layers; ++tl) {
1165 unsigned int j = sl * ts_number_layers + tl;
1166 aom_video_writer_write_frame(outfile[j], pkt->
data.
frame.buf,
1169 rc.layer_encoding_bitrate[j] += 8.0 * pkt->
data.
frame.sz;
1173 aom_video_writer_write_frame(total_layer_file, pkt->
data.
frame.buf,
1179 rc.layer_avg_frame_size[j] += 8.0 * pkt->
data.
frame.sz;
1180 rc.layer_avg_rate_mismatch[j] +=
1181 fabs(8.0 * pkt->
data.
frame.sz - rc.layer_pfb[j]) /
1190 if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
1191 sum_bitrate += 0.001 * 8.0 * pkt->
data.
frame.sz * framerate;
1192 rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
1193 if (frame_cnt % rc.window_size == 0) {
1194 rc.window_count += 1;
1195 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
1196 rc.variance_st_encoding_bitrate +=
1197 (sum_bitrate / rc.window_size) *
1198 (sum_bitrate / rc.window_size);
1203 if (frame_cnt > rc.window_size + rc.window_size / 2 &&
1204 slx == ss_number_layers - 1) {
1205 sum_bitrate2 += 0.001 * 8.0 * pkt->
data.
frame.sz * framerate;
1206 if (frame_cnt > 2 * rc.window_size &&
1207 frame_cnt % rc.window_size == 0) {
1208 rc.window_count += 1;
1209 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
1210 rc.variance_st_encoding_bitrate +=
1211 (sum_bitrate2 / rc.window_size) *
1212 (sum_bitrate2 / rc.window_size);
1222 pts += frame_duration;
1224 close_input_file(&(app_input.input_ctx));
1225 printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
1228 printf(
"Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
1229 frame_cnt, 1000 * (
float)cx_time / (
double)(frame_cnt * 1000000),
1230 1000000 * (
double)frame_cnt / (
double)cx_time);
1232 if (ss_number_layers > 1) {
1233 printf(
"Per spatial layer: \n");
1234 for (
unsigned int slx = 0; slx < ss_number_layers; slx++)
1235 printf(
"Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
1236 frame_cnt, (
float)cx_time_sl[slx] / (
double)(frame_cnt * 1000),
1237 1000000 * (
double)frame_cnt / (
double)cx_time_sl[slx]);
1243 for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
1244 aom_video_writer_close(outfile[i]);
1245 aom_video_writer_close(total_layer_file);
1247 if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
1250 return EXIT_SUCCESS;
Describes the encoder algorithm interface to applications.
enum aom_chroma_sample_position aom_chroma_sample_position_t
List of chroma sample positions.
aom_image_t * aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
@ AOM_IMG_FMT_I420
Definition: aom_image.h:45
void aom_img_free(aom_image_t *img)
Close an image descriptor.
Provides definitions for using AOM or AV1 encoder algorithm within the aom Codec Interface.
Declares top-level encoder structures and functions.
#define AOM_MAX_LAYERS
Definition: aomcx.h:1417
aom_codec_iface_t * aom_codec_av1_cx(void)
The interface to the AV1 encoder.
#define AOM_MAX_TS_LAYERS
Definition: aomcx.h:1419
@ AV1E_SET_ROW_MT
Codec control function to enable the row based multi-threading of the encoder, unsigned int parameter...
Definition: aomcx.h:348
@ AV1E_SET_ENABLE_TPL_MODEL
Codec control function to enable RDO modulated by frame temporal dependency, unsigned int parameter.
Definition: aomcx.h:395
@ AV1E_SET_AQ_MODE
Codec control function to set adaptive quantization mode, unsigned int parameter.
Definition: aomcx.h:455
@ AV1E_SET_SVC_LAYER_ID
Codec control function to set the layer id, aom_svc_layer_id_t* parameter.
Definition: aomcx.h:1256
@ AV1E_SET_SVC_REF_FRAME_CONFIG
Codec control function to set reference frame config: the ref_idx and the refresh flags for each buff...
Definition: aomcx.h:1267
@ AV1E_SET_CDF_UPDATE_MODE
Codec control function to set CDF update mode, unsigned int parameter.
Definition: aomcx.h:493
@ AV1E_SET_MV_COST_UPD_FREQ
Control to set frequency of the cost updates for motion vectors, unsigned int parameter.
Definition: aomcx.h:1234
@ AV1E_SET_COEFF_COST_UPD_FREQ
Control to set frequency of the cost updates for coefficients, unsigned int parameter.
Definition: aomcx.h:1214
@ AV1E_SET_ENABLE_CDEF
Codec control function to encode with CDEF, unsigned int parameter.
Definition: aomcx.h:652
@ AV1E_SET_SVC_PARAMS
Codec control function to set SVC paramaeters, aom_svc_params_t* parameter.
Definition: aomcx.h:1261
@ AOME_SET_MAX_INTRA_BITRATE_PCT
Codec control function to set max data rate for intra frames, unsigned int parameter.
Definition: aomcx.h:293
@ AV1E_SET_ERROR_RESILIENT_MODE
Codec control function to enable error_resilient_mode, int parameter.
Definition: aomcx.h:429
@ AOME_SET_SCALEMODE
Codec control function to set encoder scaling mode, aom_scaling_mode_t* parameter.
Definition: aomcx.h:195
@ AV1E_SET_TILE_COLUMNS
Codec control function to set number of tile columns. unsigned int parameter.
Definition: aomcx.h:367
@ AV1E_SET_ENABLE_ORDER_HINT
Codec control function to turn on / off frame order hint (int parameter). Affects: joint compound mod...
Definition: aomcx.h:847
@ AV1E_SET_DELTAQ_MODE
Codec control function to set the delta q mode, unsigned int parameter.
Definition: aomcx.h:1111
@ AOME_SET_CPUUSED
Codec control function to set encoder internal speed settings, int parameter.
Definition: aomcx.h:213
@ AV1E_SET_GF_CBR_BOOST_PCT
Boost percentage for Golden Frame in CBR mode, unsigned int parameter.
Definition: aomcx.h:326
@ AV1E_SET_MODE_COST_UPD_FREQ
Control to set frequency of the cost updates for mode, unsigned int parameter.
Definition: aomcx.h:1224
const char * aom_codec_iface_name(aom_codec_iface_t *iface)
Return the name for a given interface.
aom_codec_err_t aom_codec_control(aom_codec_ctx_t *ctx, int ctrl_id,...)
Algorithm Control.
const struct aom_codec_iface aom_codec_iface_t
Codec interface structure.
Definition: aom_codec.h:254
const char * aom_codec_err_to_string(aom_codec_err_t err)
Convert error number to printable string.
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx)
Destroy a codec instance.
aom_codec_err_t
Algorithm return codes.
Definition: aom_codec.h:155
const void * aom_codec_iter_t
Iterator.
Definition: aom_codec.h:288
#define AOM_FRAME_IS_KEY
Definition: aom_codec.h:271
@ AOM_BITS_12
Definition: aom_codec.h:321
@ AOM_BITS_8
Definition: aom_codec.h:319
@ AOM_BITS_10
Definition: aom_codec.h:320
@ AOM_CODEC_INVALID_PARAM
An application-supplied parameter is not valid.
Definition: aom_codec.h:200
@ AOM_CODEC_MEM_ERROR
Memory operation failed.
Definition: aom_codec.h:163
@ AOM_CODEC_OK
Operation completed without error.
Definition: aom_codec.h:157
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags)
Encode a frame.
#define aom_codec_enc_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_enc_init_ver()
Definition: aom_encoder.h:931
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg, unsigned int usage)
Get the default configuration for a usage.
#define AOM_USAGE_REALTIME
usage parameter analogous to AV1 REALTIME mode.
Definition: aom_encoder.h:1004
const aom_codec_cx_pkt_t * aom_codec_get_cx_data(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter)
Encoded data iterator.
@ AOM_CBR
Definition: aom_encoder.h:167
@ AOM_KF_AUTO
Definition: aom_encoder.h:182
@ AOM_CODEC_CX_FRAME_PKT
Definition: aom_encoder.h:98
Codec context structure.
Definition: aom_codec.h:298
Encoder output packet.
Definition: aom_encoder.h:110
enum aom_codec_cx_pkt_kind kind
Definition: aom_encoder.h:111
union aom_codec_cx_pkt::@1 data
struct aom_codec_cx_pkt::@1::@2 frame
Encoder configuration structure.
Definition: aom_encoder.h:367
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition: aom_encoder.h:450
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition: aom_encoder.h:515
struct aom_rational g_timebase
Stream timebase units.
Definition: aom_encoder.h:464
unsigned int g_usage
Algorithm specific "usage" value.
Definition: aom_encoder.h:379
unsigned int rc_buf_sz
Decoder Buffer Size.
Definition: aom_encoder.h:679
unsigned int g_h
Height of the frame.
Definition: aom_encoder.h:415
enum aom_kf_mode kf_mode
Keyframe placement mode.
Definition: aom_encoder.h:742
enum aom_rc_mode rc_end_usage
Rate control algorithm to use.
Definition: aom_encoder.h:598
unsigned int g_threads
Maximum number of threads to use.
Definition: aom_encoder.h:387
unsigned int kf_min_dist
Keyframe minimum interval.
Definition: aom_encoder.h:751
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition: aom_encoder.h:493
unsigned int rc_buf_initial_sz
Decoder Buffer Initial Size.
Definition: aom_encoder.h:688
unsigned int g_profile
Bitstream profile to use.
Definition: aom_encoder.h:397
aom_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition: aom_encoder.h:442
unsigned int g_w
Width of the frame.
Definition: aom_encoder.h:406
unsigned int rc_undershoot_pct
Rate control adaptation undershoot control.
Definition: aom_encoder.h:655
unsigned int kf_max_dist
Keyframe maximum interval.
Definition: aom_encoder.h:760
aom_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition: aom_encoder.h:472
unsigned int rc_max_quantizer
Maximum (Worst Quality) Quantizer.
Definition: aom_encoder.h:642
unsigned int rc_buf_optimal_sz
Decoder Buffer Optimal Size.
Definition: aom_encoder.h:697
unsigned int rc_min_quantizer
Minimum (Best Quality) Quantizer.
Definition: aom_encoder.h:632
unsigned int rc_target_bitrate
Target data rate.
Definition: aom_encoder.h:618
unsigned int rc_resize_mode
Mode for spatial resampling, if supported by the codec.
Definition: aom_encoder.h:524
unsigned int rc_overshoot_pct
Rate control adaptation overshoot control.
Definition: aom_encoder.h:664
Image Descriptor.
Definition: aom_image.h:171
int num
Definition: aom_encoder.h:153
int den
Definition: aom_encoder.h:154
aom image scaling mode
Definition: aomcx.h:1382
int temporal_layer_id
Definition: aomcx.h:1424
int spatial_layer_id
Definition: aomcx.h:1423
int max_quantizers[32]
Definition: aomcx.h:1431
int number_spatial_layers
Definition: aomcx.h:1429
int layer_target_bitrate[32]
Definition: aomcx.h:1436
int framerate_factor[8]
Definition: aomcx.h:1438
int min_quantizers[32]
Definition: aomcx.h:1432
int scaling_factor_den[4]
Definition: aomcx.h:1434
int number_temporal_layers
Definition: aomcx.h:1430
int scaling_factor_num[4]
Definition: aomcx.h:1433
int reference[7]
Definition: aomcx.h:1445
int refresh[8]
Definition: aomcx.h:1448
int ref_idx[7]
Definition: aomcx.h:1447