Re: [PATCH v2 for-rc 3/3] RDMA/hns: Add check and adjust for function resource values

From: Leon Romanovsky
Date: Mon Jul 17 2023 - 04:10:46 EST


On Mon, Jul 17, 2023 at 02:03:40PM +0800, Junxian Huang wrote:
> Currently, RoCE driver gets function resource values from firmware
> without validity check.

Kernel trusts devices underneath, otherwise why should we stop with
capabilities? Let's check all PCI transactions and verify any response
from FW too.

> As these resources are mostly related to memory,
> an invalid value may lead to serious consequence such as kernel panic.
>
> This patch adds check for these resource values and adjusts the invalid
> ones.

These are FW bugs which should be fixed.

Thanks

>
> Signed-off-by: Junxian Huang <huangjunxian6@xxxxxxxxxxxxx>
> ---
> drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 115 ++++++++++++++++++++-
> drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 37 +++++++
> 2 files changed, 148 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index c4b92d8bd98a..f5649fd25042 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -1650,6 +1650,97 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
> return hns_roce_cmq_send(hr_dev, &desc, 1);
> }
>
> +static const struct hns_roce_bt_num {
> + u32 res_offset;
> + u32 min;
> + u32 max;
> + enum hns_roce_res_invalid_flag invalid_flag;
> + enum hns_roce_res_revision revision;
> + bool vf_support;
> +} bt_num_table[] = {
> + {RES_OFFSET_IN_CAPS(qpc_bt_num), 1,
> + MAX_QPC_BT_NUM, QPC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
> + {RES_OFFSET_IN_CAPS(srqc_bt_num), 1,
> + MAX_SRQC_BT_NUM, SRQC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
> + {RES_OFFSET_IN_CAPS(cqc_bt_num), 1,
> + MAX_CQC_BT_NUM, CQC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
> + {RES_OFFSET_IN_CAPS(mpt_bt_num), 1,
> + MAX_MPT_BT_NUM, MPT_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
> + {RES_OFFSET_IN_CAPS(sl_num), 1,
> + MAX_SL_NUM, QID_NUM_INVALID_FLAG, RES_FOR_ALL, true},
> + {RES_OFFSET_IN_CAPS(sccc_bt_num), 1,
> + MAX_SCCC_BT_NUM, SCCC_BT_NUM_INVALID_FLAG, RES_FOR_ALL, true},
> + {RES_OFFSET_IN_CAPS(qpc_timer_bt_num), 1,
> + MAX_QPC_TIMER_BT_NUM, QPC_TIMER_BT_NUM_INVALID_FLAG,
> + RES_FOR_ALL, false},
> + {RES_OFFSET_IN_CAPS(cqc_timer_bt_num), 1,
> + MAX_CQC_TIMER_BT_NUM, CQC_TIMER_BT_NUM_INVALID_FLAG,
> + RES_FOR_ALL, false},
> + {RES_OFFSET_IN_CAPS(gmv_bt_num), 1,
> + MAX_GMV_BT_NUM, GMV_BT_NUM_INVALID_FLAG,
> + RES_FOR_HIP09, true},
> + {RES_OFFSET_IN_CAPS(smac_bt_num), 1,
> + MAX_SMAC_BT_NUM, SMAC_BT_NUM_INVALID_FLAG,
> + RES_FOR_HIP08, true},
> + {RES_OFFSET_IN_CAPS(sgid_bt_num), 1,
> + MAX_SGID_BT_NUM, SGID_BT_NUM_INVALID_FLAG,
> + RES_FOR_HIP08, true},
> +};
> +
> +static bool check_res_is_supported(struct hns_roce_dev *hr_dev,
> + struct hns_roce_bt_num *bt_num_entry)
> +{
> + if (!bt_num_entry->vf_support && hr_dev->is_vf)
> + return false;
> +
> + if (bt_num_entry->revision == RES_FOR_HIP09 &&
> + hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08)
> + return false;
> +
> + if (bt_num_entry->revision == RES_FOR_HIP08 &&
> + hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
> + return false;
> +
> + return true;
> +}
> +
> +static void adjust_eqc_bt_num(struct hns_roce_caps *caps, u16 *invalid_flag)
> +{
> + if (caps->eqc_bt_num < caps->num_comp_vectors + caps->num_aeq_vectors ||
> + caps->eqc_bt_num > MAX_EQC_BT_NUM) {
> + caps->eqc_bt_num = caps->eqc_bt_num > MAX_EQC_BT_NUM ?
> + MAX_EQC_BT_NUM : caps->num_comp_vectors +
> + caps->num_aeq_vectors;
> + *invalid_flag |= 1 << EQC_BT_NUM_INVALID_FLAG;
> + }
> +}
> +
> +static u16 adjust_res_caps(struct hns_roce_dev *hr_dev)
> +{
> + struct hns_roce_caps *caps = &hr_dev->caps;
> + u16 invalid_flag = 0;
> + u32 min, max;
> + u32 *res;
> + int i;
> +
> + for (i = 0; i < ARRAY_SIZE(bt_num_table); i++) {
> + if (!check_res_is_supported(hr_dev, &bt_num_table[i]))
> + continue;
> +
> + res = (u32 *)((void *)caps + bt_num_table[i].res_offset);
> + min = bt_num_table[i].min;
> + max = bt_num_table[i].max;
> + if (*res < min || *res > max) {
> + *res = *res < min ? min : max;
> + invalid_flag |= 1 << bt_num_table[i].invalid_flag;
> + }
> + }
> +
> + adjust_eqc_bt_num(caps, &invalid_flag);
> +
> + return invalid_flag;
> +}
> +
> static int load_func_res_caps(struct hns_roce_dev *hr_dev, bool is_vf)
> {
> struct hns_roce_cmq_desc desc[2];
> @@ -1730,11 +1821,19 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
> }
>
> ret = load_pf_timer_res_caps(hr_dev);
> - if (ret)
> + if (ret) {
> dev_err(dev, "failed to load pf timer resource, ret = %d.\n",
> ret);
> + return ret;
> + }
>
> - return ret;
> + ret = adjust_res_caps(hr_dev);
> + if (ret)
> + dev_warn(dev,
> + "invalid resource values have been adjusted, invalid_flag = 0x%x.\n",
> + ret);
> +
> + return 0;
> }
>
> static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
> @@ -1743,10 +1842,18 @@ static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
> int ret;
>
> ret = load_func_res_caps(hr_dev, true);
> - if (ret)
> + if (ret) {
> dev_err(dev, "failed to load vf res caps, ret = %d.\n", ret);
> + return ret;
> + }
>
> - return ret;
> + ret = adjust_res_caps(hr_dev);
> + if (ret)
> + dev_warn(dev,
> + "invalid resource values have been adjusted, invalid_flag = 0x%x.\n",
> + ret);
> +
> + return 0;
> }
>
> static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> index d9693f6cc802..c2d46383c88c 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
> @@ -972,6 +972,43 @@ struct hns_roce_func_clear {
> #define CFG_GLOBAL_PARAM_1US_CYCLES CMQ_REQ_FIELD_LOC(9, 0)
> #define CFG_GLOBAL_PARAM_UDP_PORT CMQ_REQ_FIELD_LOC(31, 16)
>
> +enum hns_roce_res_invalid_flag {
> + QPC_BT_NUM_INVALID_FLAG,
> + SRQC_BT_NUM_INVALID_FLAG,
> + CQC_BT_NUM_INVALID_FLAG,
> + MPT_BT_NUM_INVALID_FLAG,
> + EQC_BT_NUM_INVALID_FLAG,
> + SMAC_BT_NUM_INVALID_FLAG,
> + SGID_BT_NUM_INVALID_FLAG,
> + QID_NUM_INVALID_FLAG,
> + SCCC_BT_NUM_INVALID_FLAG,
> + GMV_BT_NUM_INVALID_FLAG,
> + QPC_TIMER_BT_NUM_INVALID_FLAG,
> + CQC_TIMER_BT_NUM_INVALID_FLAG,
> +};
> +
> +enum hns_roce_res_revision {
> + RES_FOR_HIP08,
> + RES_FOR_HIP09,
> + RES_FOR_ALL,
> +};
> +
> +#define RES_OFFSET_IN_CAPS(res) \
> + (offsetof(struct hns_roce_caps, res))
> +
> +#define MAX_QPC_BT_NUM 2048
> +#define MAX_SRQC_BT_NUM 512
> +#define MAX_CQC_BT_NUM 512
> +#define MAX_MPT_BT_NUM 512
> +#define MAX_EQC_BT_NUM 512
> +#define MAX_SMAC_BT_NUM 256
> +#define MAX_SGID_BT_NUM 256
> +#define MAX_SL_NUM 8
> +#define MAX_SCCC_BT_NUM 512
> +#define MAX_GMV_BT_NUM 256
> +#define MAX_QPC_TIMER_BT_NUM 1728
> +#define MAX_CQC_TIMER_BT_NUM 1600
> +
> /*
> * Fields of HNS_ROCE_OPC_QUERY_PF_RES, HNS_ROCE_OPC_QUERY_VF_RES
> * and HNS_ROCE_OPC_ALLOC_VF_RES
> --
> 2.30.0
>