[PATCH 30/33] iris: variant: iris3: add helper for bus and clock calculation

From: Vikash Garodia
Date: Fri Jul 28 2023 - 10:55:35 EST


From: Dikshita Agarwal <quic_dikshita@xxxxxxxxxxx>

This adds the helper function to calculate the required bus
bandwidth and clock frequency for the given video usecase/s.

Signed-off-by: Dikshita Agarwal <quic_dikshita@xxxxxxxxxxx>
Signed-off-by: Vikash Garodia <quic_vgarodia@xxxxxxxxxxx>
---
.../iris/variant/iris3/inc/msm_vidc_power_iris3.h | 17 +
.../iris/variant/iris3/src/msm_vidc_power_iris3.c | 345 +++++++++++++++++++++
2 files changed, 362 insertions(+)
create mode 100644 drivers/media/platform/qcom/iris/variant/iris3/inc/msm_vidc_power_iris3.h
create mode 100644 drivers/media/platform/qcom/iris/variant/iris3/src/msm_vidc_power_iris3.c

diff --git a/drivers/media/platform/qcom/iris/variant/iris3/inc/msm_vidc_power_iris3.h b/drivers/media/platform/qcom/iris/variant/iris3/inc/msm_vidc_power_iris3.h
new file mode 100644
index 0000000..a6f3e54
--- /dev/null
+++ b/drivers/media/platform/qcom/iris/variant/iris3/inc/msm_vidc_power_iris3.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __H_MSM_VIDC_POWER_IRIS3_H__
+#define __H_MSM_VIDC_POWER_IRIS3_H__
+
+#include "msm_vidc_inst.h"
+#include "msm_vidc_power.h"
+
+u64 msm_vidc_calc_freq_iris3(struct msm_vidc_inst *inst, u32 data_size);
+int msm_vidc_calc_bw_iris3(struct msm_vidc_inst *inst,
+ struct vidc_bus_vote_data *vote_data);
+
+#endif
diff --git a/drivers/media/platform/qcom/iris/variant/iris3/src/msm_vidc_power_iris3.c b/drivers/media/platform/qcom/iris/variant/iris3/src/msm_vidc_power_iris3.c
new file mode 100644
index 0000000..32b549c
--- /dev/null
+++ b/drivers/media/platform/qcom/iris/variant/iris3/src/msm_vidc_power_iris3.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include "msm_vidc_core.h"
+#include "msm_vidc_debug.h"
+#include "msm_vidc_driver.h"
+#include "msm_vidc_inst.h"
+#include "msm_vidc_power.h"
+#include "msm_vidc_power_iris3.h"
+#include "perf_static_model.h"
+
+static int msm_vidc_init_codec_input_freq(struct msm_vidc_inst *inst, u32 data_size,
+ struct api_calculation_input *codec_input)
+{
+ enum msm_vidc_port_type port;
+ u32 color_fmt;
+
+ if (is_encode_session(inst)) {
+ codec_input->decoder_or_encoder = CODEC_ENCODER;
+ } else if (is_decode_session(inst)) {
+ codec_input->decoder_or_encoder = CODEC_DECODER;
+ } else {
+ d_vpr_e("%s: invalid domain %d\n", __func__, inst->domain);
+ return -EINVAL;
+ }
+
+ codec_input->chipset_gen = MSM_SM8550;
+
+ if (inst->codec == MSM_VIDC_H264) {
+ codec_input->codec = CODEC_H264;
+ codec_input->lcu_size = 16;
+ if (inst->capabilities[ENTROPY_MODE].value ==
+ V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC)
+ codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CABAC;
+ else
+ codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CAVLC;
+ } else if (inst->codec == MSM_VIDC_HEVC) {
+ codec_input->codec = CODEC_HEVC;
+ codec_input->lcu_size = 32;
+ } else if (inst->codec == MSM_VIDC_VP9) {
+ codec_input->codec = CODEC_VP9;
+ codec_input->lcu_size = 16;
+ } else {
+ d_vpr_e("%s: invalid codec %d\n", __func__, inst->codec);
+ return -EINVAL;
+ }
+
+ codec_input->pipe_num = inst->capabilities[PIPE].value;
+ codec_input->frame_rate = inst->max_rate;
+
+ port = is_decode_session(inst) ? INPUT_PORT : OUTPUT_PORT;
+ codec_input->frame_width = inst->fmts[port].fmt.pix_mp.width;
+ codec_input->frame_height = inst->fmts[port].fmt.pix_mp.height;
+
+ if (inst->capabilities[STAGE].value == MSM_VIDC_STAGE_1) {
+ codec_input->vsp_vpp_mode = CODEC_VSPVPP_MODE_1S;
+ } else if (inst->capabilities[STAGE].value == MSM_VIDC_STAGE_2) {
+ codec_input->vsp_vpp_mode = CODEC_VSPVPP_MODE_2S;
+ } else {
+ d_vpr_e("%s: invalid stage %d\n", __func__,
+ inst->capabilities[STAGE].value);
+ return -EINVAL;
+ }
+
+ if (inst->capabilities[BIT_DEPTH].value == BIT_DEPTH_8)
+ codec_input->bitdepth = CODEC_BITDEPTH_8;
+ else
+ codec_input->bitdepth = CODEC_BITDEPTH_10;
+
+ /*
+ * Used for calculating Encoder GOP Complexity
+ * hierachical_layer= 0..7 used as Array Index
+ * inst->capabilities[B_FRAME].value=[ 0 1 2]
+ * TODO how to map?
+ */
+
+ /* set as IPP */
+ codec_input->hierachical_layer = 0;
+
+ if (is_decode_session(inst))
+ color_fmt =
+ v4l2_colorformat_to_driver(inst,
+ inst->fmts[OUTPUT_PORT].fmt.pix_mp.pixelformat,
+ __func__);
+ else
+ color_fmt =
+ v4l2_colorformat_to_driver(inst,
+ inst->fmts[INPUT_PORT].fmt.pix_mp.pixelformat,
+ __func__);
+
+ codec_input->linear_opb = is_linear_colorformat(color_fmt);
+ codec_input->bitrate_mbps =
+ (codec_input->frame_rate * data_size * 8) / 1000000;
+
+ /* set as sanity mode */
+ codec_input->regression_mode = 1;
+
+ return 0;
+}
+
+static int msm_vidc_init_codec_input_bus(struct msm_vidc_inst *inst, struct vidc_bus_vote_data *d,
+ struct api_calculation_input *codec_input)
+{
+ u32 complexity_factor_int = 0, complexity_factor_frac = 0;
+ bool opb_compression_enabled = false;
+
+ if (!d)
+ return -EINVAL;
+
+ if (d->domain == MSM_VIDC_ENCODER) {
+ codec_input->decoder_or_encoder = CODEC_ENCODER;
+ } else if (d->domain == MSM_VIDC_DECODER) {
+ codec_input->decoder_or_encoder = CODEC_DECODER;
+ } else {
+ d_vpr_e("%s: invalid domain %d\n", __func__, d->domain);
+ return -EINVAL;
+ }
+
+ codec_input->chipset_gen = MSM_SM8550;
+
+ if (d->codec == MSM_VIDC_H264) {
+ codec_input->codec = CODEC_H264;
+ } else if (d->codec == MSM_VIDC_HEVC) {
+ codec_input->codec = CODEC_HEVC;
+ } else if (d->codec == MSM_VIDC_VP9) {
+ codec_input->codec = CODEC_VP9;
+ } else {
+ d_vpr_e("%s: invalid codec %d\n", __func__, d->codec);
+ return -EINVAL;
+ }
+
+ codec_input->lcu_size = d->lcu_size;
+ codec_input->pipe_num = d->num_vpp_pipes;
+ codec_input->frame_rate = d->fps;
+ codec_input->frame_width = d->input_width;
+ codec_input->frame_height = d->input_height;
+
+ if (d->work_mode == MSM_VIDC_STAGE_1) {
+ codec_input->vsp_vpp_mode = CODEC_VSPVPP_MODE_1S;
+ } else if (d->work_mode == MSM_VIDC_STAGE_2) {
+ codec_input->vsp_vpp_mode = CODEC_VSPVPP_MODE_2S;
+ } else {
+ d_vpr_e("%s: invalid stage %d\n", __func__, d->work_mode);
+ return -EINVAL;
+ }
+
+ if (inst->capabilities[ENTROPY_MODE].value ==
+ V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC) {
+ codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CABAC;
+ } else if (inst->capabilities[ENTROPY_MODE].value ==
+ V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC) {
+ codec_input->entropy_coding_mode = CODEC_ENTROPY_CODING_CAVLC;
+ } else {
+ d_vpr_e("%s: invalid entropy %d\n", __func__,
+ inst->capabilities[ENTROPY_MODE].value);
+ return -EINVAL;
+ }
+
+ /*
+ * Used for calculating Encoder GOP Complexity
+ * hierachical_layer= 0..7 used as Array Index
+ * TODO how to map?
+ */
+ codec_input->hierachical_layer = 0; /* set as IPP */
+
+ /*
+ * If the calculated motion_vector_complexity is > 2 then set the
+ * complexity_setting and refframe_complexity to be pwc(performance worst case)
+ * values. If the motion_vector_complexity is < 2 then set the complexity_setting
+ * and refframe_complexity to be average case values.
+ */
+
+ complexity_factor_int = Q16_INT(d->complexity_factor);
+ complexity_factor_frac = Q16_FRAC(d->complexity_factor);
+
+ if (complexity_factor_int < COMPLEXITY_THRESHOLD ||
+ (complexity_factor_int == COMPLEXITY_THRESHOLD &&
+ complexity_factor_frac == 0)) {
+ /* set as average case values */
+ codec_input->complexity_setting = COMPLEXITY_SETTING_AVG;
+ codec_input->refframe_complexity = REFFRAME_COMPLEXITY_AVG;
+ } else {
+ /* set as pwc */
+ codec_input->complexity_setting = COMPLEXITY_SETTING_PWC;
+ codec_input->refframe_complexity = REFFRAME_COMPLEXITY_PWC;
+ }
+
+ codec_input->status_llc_onoff = d->use_sys_cache;
+
+ if (__bpp(d->color_formats[0]) == 8)
+ codec_input->bitdepth = CODEC_BITDEPTH_8;
+ else
+ codec_input->bitdepth = CODEC_BITDEPTH_10;
+
+ if (d->num_formats == 1) {
+ codec_input->split_opb = 0;
+ codec_input->linear_opb = !__ubwc(d->color_formats[0]);
+ } else if (d->num_formats == 2) {
+ codec_input->split_opb = 1;
+ codec_input->linear_opb = !__ubwc(d->color_formats[1]);
+ } else {
+ d_vpr_e("%s: invalid num_formats %d\n",
+ __func__, d->num_formats);
+ return -EINVAL;
+ }
+
+ codec_input->linear_ipb = 0; /* set as ubwc ipb */
+
+ /* TODO Confirm if we always LOSSLESS mode ie lossy_ipb = 0*/
+ codec_input->lossy_ipb = 0; /* set as lossless ipb */
+
+ /* TODO Confirm if no multiref */
+ codec_input->encoder_multiref = 0; /* set as no multiref */
+ codec_input->bitrate_mbps = (d->bitrate / 1000000); /* bps 10; set as 10mbps */
+
+ opb_compression_enabled = d->num_formats >= 2 && __ubwc(d->color_formats[1]);
+
+ /* ANDROID CR is in Q16 format, StaticModel CR in x100 format */
+ codec_input->cr_dpb = ((Q16_INT(d->compression_ratio) * 100) +
+ Q16_FRAC(d->compression_ratio));
+
+ codec_input->cr_opb = opb_compression_enabled ?
+ codec_input->cr_dpb : 65536;
+
+ codec_input->cr_ipb = ((Q16_INT(d->input_cr) * 100) + Q16_FRAC(d->input_cr));
+ codec_input->cr_rpb = codec_input->cr_dpb; /* cr_rpb only for encoder */
+
+ /* disable by default, only enable for aurora depth map session */
+ codec_input->lumaonly_decode = 0;
+
+ /* set as custom regression mode, as are using cr,cf values from FW */
+ codec_input->regression_mode = REGRESSION_MODE_CUSTOM;
+
+ /* Dump all the variables for easier debugging */
+ if (msm_vidc_debug & VIDC_BUS) {
+ struct dump dump[] = {
+ {"complexity_factor_int", "%d", complexity_factor_int},
+ {"complexity_factor_frac", "%d", complexity_factor_frac},
+ {"refframe_complexity", "%d", codec_input->refframe_complexity},
+ {"complexity_setting", "%d", codec_input->complexity_setting},
+ {"cr_dpb", "%d", codec_input->cr_dpb},
+ {"cr_opb", "%d", codec_input->cr_opb},
+ {"cr_ipb", "%d", codec_input->cr_ipb},
+ {"cr_rpb", "%d", codec_input->cr_rpb},
+ {"lcu size", "%d", codec_input->lcu_size},
+ {"pipe number", "%d", codec_input->pipe_num},
+ {"frame_rate", "%d", codec_input->frame_rate},
+ {"frame_width", "%d", codec_input->frame_width},
+ {"frame_height", "%d", codec_input->frame_height},
+ {"work_mode", "%d", d->work_mode},
+ {"encoder_or_decode", "%d", inst->domain},
+ {"chipset_gen", "%d", codec_input->chipset_gen},
+ {"codec_input", "%d", codec_input->codec},
+ {"entropy_coding_mode", "%d", codec_input->entropy_coding_mode},
+ {"hierachical_layer", "%d", codec_input->hierachical_layer},
+ {"status_llc_onoff", "%d", codec_input->status_llc_onoff},
+ {"bit_depth", "%d", codec_input->bitdepth},
+ {"split_opb", "%d", codec_input->split_opb},
+ {"linear_opb", "%d", codec_input->linear_opb},
+ {"linear_ipb", "%d", codec_input->linear_ipb},
+ {"lossy_ipb", "%d", codec_input->lossy_ipb},
+ {"encoder_multiref", "%d", codec_input->encoder_multiref},
+ {"bitrate_mbps", "%d", codec_input->bitrate_mbps},
+ {"lumaonly_decode", "%d", codec_input->lumaonly_decode},
+ {"regression_mode", "%d", codec_input->regression_mode},
+ };
+ __dump(dump, ARRAY_SIZE(dump));
+ }
+
+ return 0;
+}
+
+u64 msm_vidc_calc_freq_iris3(struct msm_vidc_inst *inst, u32 data_size)
+{
+ u64 freq = 0;
+ struct msm_vidc_core *core;
+ int ret = 0;
+ struct api_calculation_input codec_input;
+ struct api_calculation_freq_output codec_output;
+ u32 fps, mbpf;
+
+ core = inst->core;
+
+ mbpf = msm_vidc_get_mbs_per_frame(inst);
+ fps = inst->max_rate;
+
+ memset(&codec_input, 0, sizeof(struct api_calculation_input));
+ memset(&codec_output, 0, sizeof(struct api_calculation_freq_output));
+ ret = msm_vidc_init_codec_input_freq(inst, data_size, &codec_input);
+ if (ret)
+ return freq;
+ ret = msm_vidc_calculate_frequency(codec_input, &codec_output);
+ if (ret)
+ return freq;
+ freq = codec_output.hw_min_freq * 1000000; /* Convert to Hz */
+
+ i_vpr_p(inst, "%s: filled len %d, required freq %llu, fps %u, mbpf %u\n",
+ __func__, data_size, freq, fps, mbpf);
+
+ if (inst->iframe && is_hevc_10bit_decode_session(inst)) {
+ /*
+ * for HEVC 10bit and iframe case only allow TURBO and
+ * limit to NOM for all other cases
+ */
+ } else {
+ /* limit to NOM, index 0 is TURBO, index 1 is NOM clock rate */
+ if (core->resource->freq_set.count >= 2 &&
+ freq > core->resource->freq_set.freq_tbl[1].freq)
+ freq = core->resource->freq_set.freq_tbl[1].freq;
+ }
+
+ return freq;
+}
+
+int msm_vidc_calc_bw_iris3(struct msm_vidc_inst *inst,
+ struct vidc_bus_vote_data *vidc_data)
+{
+ int ret = 0;
+ struct api_calculation_input codec_input;
+ struct api_calculation_bw_output codec_output;
+
+ if (!vidc_data)
+ return ret;
+
+ memset(&codec_input, 0, sizeof(struct api_calculation_input));
+ memset(&codec_output, 0, sizeof(struct api_calculation_bw_output));
+
+ ret = msm_vidc_init_codec_input_bus(inst, vidc_data, &codec_input);
+ if (ret)
+ return ret;
+ ret = msm_vidc_calculate_bandwidth(codec_input, &codec_output);
+ if (ret)
+ return ret;
+
+ vidc_data->calc_bw_ddr = kbps(codec_output.ddr_bw_rd + codec_output.ddr_bw_wr);
+ vidc_data->calc_bw_llcc = kbps(codec_output.noc_bw_rd + codec_output.noc_bw_wr);
+
+ i_vpr_l(inst, "%s: calc_bw_ddr %llu calc_bw_llcc %llu",
+ __func__, vidc_data->calc_bw_ddr, vidc_data->calc_bw_llcc);
+
+ return ret;
+}
--
2.7.4