RE: [EXT] Re: [net-next PATCH v3 1/2] octeontx2-af: Add new mbox to support multicast/mirror offload

From: Suman Ghosh
Date: Sun Nov 26 2023 - 11:04:46 EST


>[...]
>> +static int nix_add_mce_list_entry(struct rvu *rvu,
>> + struct nix_hw *nix_hw,
>> + struct nix_mcast_grp_elem *elem,
>> + struct nix_mcast_grp_update_req *req) {
>> + struct mce *tmp_mce[NIX_MCE_ENTRY_MAX];
>
>If I read correctly the above struct is is 256 bytes wide. Do you really
>need to put so much data on the stack this deep?
[Suman] Ack, will update the logic in v4.
>
>
>> + u32 num_entry = req->num_mce_entry;
>> + struct nix_mce_list *mce_list;
>> + struct mce *mce;
>> + int i;
>> +
>> + mce_list = &elem->mcast_mce_list;
>> + for (i = 0; i < num_entry; i++) {
>> + mce = kzalloc(sizeof(*mce), GFP_KERNEL);
>> + if (!mce)
>> + goto free_mce;
>> +
>> + mce->pcifunc = req->pcifunc[i];
>> + mce->channel = req->channel[i];
>> + mce->rq_rss_index = req->rq_rss_index[i];
>> + mce->dest_type = req->dest_type[i];
>> + mce->is_active = 1;
>> + hlist_add_head(&mce->node, &mce_list->head);
>> + tmp_mce[i] = mce;
>> + mce_list->count++;
>> + }
>> +
>> + mce_list->max += num_entry;
>> +
>> + /* Dump the updated list to HW */
>> + if (elem->dir == NIX_MCAST_INGRESS)
>> + return nix_update_ingress_mce_list_hw(rvu, nix_hw, elem);
>> +
>> + nix_update_egress_mce_list_hw(rvu, nix_hw, elem);
>> + return 0;
>> +
>> +free_mce:
>> + while (i) {
>> + hlist_del(&tmp_mce[i-1]->node);
>> + kfree(tmp_mce[i-1]);
>
>Minor nit: checkpatch complains about the above: spaces are needed
>around '-'.
[Suman] This code should be updated in v4.
>
>> + mce_list->count--;
>> + i--;
>> + }
>> +
>> + return -ENOMEM;
>> +}
>> +
>> static int nix_update_mce_list_entry(struct nix_mce_list *mce_list,
>> u16 pcifunc, bool add)
>> {
>
>[...]
>> @@ -3237,13 +3473,30 @@ static int nix_setup_mcast(struct rvu *rvu,
>struct nix_hw *nix_hw, int blkaddr)
>> int err, size;
>>
>> size = (rvu_read64(rvu, blkaddr, NIX_AF_CONST3) >> 16) & 0x0F;
>> - size = (1ULL << size);
>> + size = BIT_ULL(size);
>> +
>> + /* Allocate bitmap for rx mce entries */
>> + mcast->mce_counter[NIX_MCAST_INGRESS].max = 256UL << MC_TBL_SIZE;
>> + err = rvu_alloc_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]);
>> + if (err)
>> + return -ENOMEM;
>> +
>> + /* Allocate bitmap for tx mce entries */
>> + mcast->mce_counter[NIX_MCAST_EGRESS].max = MC_TX_MAX;
>> + err = rvu_alloc_bitmap(&mcast->mce_counter[NIX_MCAST_EGRESS]);
>> + if (err) {
>> + rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]);
>> + return -ENOMEM;
>> + }
>>
>> /* Alloc memory for multicast/mirror replication entries */
>> err = qmem_alloc(rvu->dev, &mcast->mce_ctx,
>> - (256UL << MC_TBL_SIZE), size);
>> - if (err)
>> + mcast->mce_counter[NIX_MCAST_INGRESS].max, size);
>> + if (err) {
>> + rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]);
>> + rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_EGRESS]);
>> return -ENOMEM;
>> + }
>>
>> rvu_write64(rvu, blkaddr, NIX_AF_RX_MCAST_BASE,
>> (u64)mcast->mce_ctx->iova);
>> @@ -3256,8 +3509,12 @@ static int nix_setup_mcast(struct rvu *rvu,
>struct nix_hw *nix_hw, int blkaddr)
>> size = rvu_read64(rvu, blkaddr, NIX_AF_MC_MIRROR_CONST) & 0xFFFF;
>> err = qmem_alloc(rvu->dev, &mcast->mcast_buf,
>> (8UL << MC_BUF_CNT), size);
>> - if (err)
>> + if (err) {
>> + rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_INGRESS]);
>> + rvu_free_bitmap(&mcast->mce_counter[NIX_MCAST_EGRESS]);
>> + qmem_free(rvu->dev, mcast->mce_ctx);
>
>AFAICS, the above lines frees struct that was already allocated prior to
>this patch. It looks like a fix possibly worthy a separate patch.
[Suman] Okay. I will push a separate patch for qmem_free()
>
>[...]
>> +static void nix_mcast_update_mce_entry(struct rvu *rvu, u16 pcifunc,
>> +u8 is_active) {
>> + struct nix_mcast_grp_elem *elem;
>> + struct nix_mcast_grp *mcast_grp;
>> + struct nix_hw *nix_hw;
>> + int blkaddr;
>> +
>> + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
>> + nix_hw = get_nix_hw(rvu->hw, blkaddr);
>> + if (!nix_hw)
>> + return;
>> +
>> + mcast_grp = &nix_hw->mcast_grp;
>> +
>> + mutex_lock(&mcast_grp->mcast_grp_lock);
>> + list_for_each_entry(elem, &mcast_grp->mcast_grp_head, list) {
>> + struct nix_mce_list *mce_list;
>> + struct mce *mce;
>> +
>> + /* Iterate the group elements and disable the element which
>> + * received the disable request.
>> + */
>> + mce_list = &elem->mcast_mce_list;
>> + hlist_for_each_entry(mce, &mce_list->head, node) {
>> + if (mce->pcifunc == pcifunc) {
>> + if (is_active)
>> + mce->is_active = 1;
>> + else
>> + mce->is_active = 0;
>
>just:
[Suman] ack, will update in v4
>
> mce->is_active = is_active;
>
>> +
>> + break;
>> + }
>> + }
>> +
>> + /* Dump the updated list to HW */
>> + if (elem->dir == NIX_MCAST_INGRESS)
>> + nix_update_ingress_mce_list_hw(rvu, nix_hw, elem);
>> + else
>> + nix_update_egress_mce_list_hw(rvu, nix_hw, elem);
>> +
>> + /* Update the multicast index in NPC rule */
>> + nix_mcast_update_action(rvu, elem);
>> + }
>> + mutex_unlock(&mcast_grp->mcast_grp_lock);
>> +}
>> +
>> int rvu_mbox_handler_nix_lf_start_rx(struct rvu *rvu, struct msg_req
>*req,
>> struct msg_rsp *rsp)
>> {
>
>[...]
>> @@ -5797,3 +6134,327 @@ int
>> rvu_mbox_handler_nix_bandprof_get_hwinfo(struct rvu *rvu, struct
>> msg_req *re
>>
>> return 0;
>> }
>> +
>> +static struct nix_mcast_grp_elem *rvu_nix_mcast_find_grp_elem(struct
>nix_mcast_grp *mcast_grp,
>> + u32 mcast_grp_idx)
>> +{
>> + struct nix_mcast_grp_elem *iter, *result_iter = NULL;
>> +
>> + mutex_lock(&mcast_grp->mcast_grp_lock);
>> + list_for_each_entry(iter, &mcast_grp->mcast_grp_head, list) {
>> + if (iter->mcast_grp_idx == mcast_grp_idx) {
>> + result_iter = iter;
>> + break;
>> + }
>> + }
>> + mutex_unlock(&mcast_grp->mcast_grp_lock);
>> +
>> + return result_iter;
>
>What prevents 'result_iter' from being freed at this point? (and causing
>a later UaF)
[Suman] I updated this as part of a review comment, but yes, it is not correct. Will revert to older version in v4.
>
>> +}
>> +
>> +int rvu_nix_mcast_get_mce_index(struct rvu *rvu, u16 pcifunc, u32
>> +mcast_grp_idx) {
>> + struct nix_mcast_grp_elem *elem;
>> + struct nix_mcast_grp *mcast_grp;
>> + struct nix_hw *nix_hw;
>> + int blkaddr;
>> +
>> + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
>> + nix_hw = get_nix_hw(rvu->hw, blkaddr);
>> + if (!nix_hw)
>> + return NIX_AF_ERR_INVALID_NIXBLK;
>> +
>> + mcast_grp = &nix_hw->mcast_grp;
>> + elem = rvu_nix_mcast_find_grp_elem(mcast_grp, mcast_grp_idx);
>> + if (!elem)
>> + return NIX_AF_ERR_INVALID_MCAST_GRP;
>> +
>> + return elem->mce_start_index;
>> +}
>> +
>> +void rvu_nix_mcast_flr_free_entries(struct rvu *rvu, u16 pcifunc) {
>> + struct nix_mcast_grp_destroy_req dreq = { 0 };
>> + struct nix_mcast_grp_update_req ureq = { 0 };
>> + struct nix_mcast_grp_update_rsp ursp = { 0 };
>> + struct nix_mcast_grp_elem *elem, *tmp;
>> + struct nix_mcast_grp *mcast_grp;
>> + struct nix_hw *nix_hw;
>> + int blkaddr;
>> +
>> + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc);
>> + nix_hw = get_nix_hw(rvu->hw, blkaddr);
>> + if (!nix_hw)
>> + return;
>> +
>> + mcast_grp = &nix_hw->mcast_grp;
>> +
>> + mutex_lock(&mcast_grp->mcast_grp_lock);
>> + list_for_each_entry_safe(elem, tmp, &mcast_grp->mcast_grp_head,
>list) {
>> + struct nix_mce_list *mce_list;
>> + struct hlist_node *tmp;
>> + struct mce *mce;
>> +
>> + /* If the pcifunc which created the multicast/mirror
>> + * group received an FLR, then delete the entire group.
>> + */
>> + if (elem->pcifunc == pcifunc) {
>> + mutex_unlock(&mcast_grp->mcast_grp_lock);
>> + /* Delete group */
>> + dreq.hdr.pcifunc = elem->pcifunc;
>> + dreq.mcast_grp_idx = elem->mcast_grp_idx;
>> + rvu_mbox_handler_nix_mcast_grp_destroy(rvu, &dreq, NULL);
>> + mutex_lock(&mcast_grp->mcast_grp_lock);
>
>What prevents 'tmp' from being removed and freed while the
>'mcast_grp_lock' is not held? there are other similar chunks later.
>
>I'm under the impression that all the multicast data is touched under
>the rtnl lock protection so the above lock does not matter much, but I
>really did not validate such impression/guess.
>
>Generally speaking the lock schema here looks complex and hard to
>follow: a more descriptive changelog will help.
[Suman] Got it. I will try to simplify the logic and push a v4.
>
>Cheers,
>
>Paolo