Re: [PATCH 6/6] scsi: hisi_sas: Expose multiple hw queues for v3 as experimental

From: Ming Lei
Date: Sun Oct 27 2019 - 04:20:15 EST


On Thu, Oct 24, 2019 at 10:21:21PM +0800, John Garry wrote:
> Since we're not ready to expose mutliple queues to the upper layer always
> due to CPU hotplug issue, add a new interim experimental command line
> option to support it.
>
> We still need to keep supporting auto_affine_msi_experimental, since
> people are now replying the performance it provides, even though it is
> unsafe.
>
> If auto_affine_msi_experimental and expose_mq_experimental are both set,
> then auto_affine_msi_experimental takes preference.
>
> Signed-off-by: John Garry <john.garry@xxxxxxxxxx>
> ---
> drivers/scsi/hisi_sas/hisi_sas.h | 2 +
> drivers/scsi/hisi_sas/hisi_sas_main.c | 55 ++++++++++++++++----------
> drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 51 +++++++++++++++++++++---
> 3 files changed, 83 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
> index 4eb8f1c53f78..884f2426d753 100644
> --- a/drivers/scsi/hisi_sas/hisi_sas.h
> +++ b/drivers/scsi/hisi_sas/hisi_sas.h
> @@ -8,6 +8,8 @@
> #define _HISI_SAS_H_
>
> #include <linux/acpi.h>
> +#include <linux/blk-mq.h>
> +#include <linux/blk-mq-pci.h>
> #include <linux/clk.h>
> #include <linux/debugfs.h>
> #include <linux/dmapool.h>
> diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
> index 53802c1cc1d0..c8c96a46acfd 100644
> --- a/drivers/scsi/hisi_sas/hisi_sas_main.c
> +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
> @@ -389,9 +389,11 @@ static int hisi_sas_task_prep(struct sas_task *task,
> struct hisi_sas_slot *slot;
> struct hisi_sas_cmd_hdr *cmd_hdr_base;
> struct asd_sas_port *sas_port = device->port;
> + struct Scsi_Host *shost = hisi_hba->shost;
> struct device *dev = hisi_hba->dev;
> int dlvry_queue_slot, dlvry_queue, rc, slot_idx;
> int n_elem = 0, n_elem_dif = 0, n_elem_req = 0;
> + struct scsi_cmnd *scmd = NULL;
> struct hisi_sas_dq *dq;
> unsigned long flags;
> int wr_q_index;
> @@ -407,13 +409,38 @@ static int hisi_sas_task_prep(struct sas_task *task,
> return -ECOMM;
> }
>
> - if (hisi_hba->reply_map) {
> - int cpu = raw_smp_processor_id();
> - unsigned int dq_index = hisi_hba->reply_map[cpu];
> + if (task->uldd_task) {
> + struct ata_queued_cmd *qc;
>
> - *dq_pointer = dq = &hisi_hba->dq[dq_index];
> - } else {
> + if (dev_is_sata(device)) {
> + qc = task->uldd_task;
> + scmd = qc->scsicmd;
> + } else {
> + scmd = task->uldd_task;
> + }
> + }
> +
> + /* We have to move to just a single mode: expose multiple queues */
> + if (!hisi_hba->reply_map && !shost->nr_hw_queues) {
> *dq_pointer = dq = sas_dev->dq;
> + } else {
> + if (hisi_hba->reply_map) {
> + int cpu = raw_smp_processor_id();
> + unsigned int dq_index = hisi_hba->reply_map[cpu];
> +
> + *dq_pointer = dq = &hisi_hba->dq[dq_index];
> + } else {
> + if (scmd) {
> + unsigned int dq_index;
> + u32 blk_tag;
> +
> + blk_tag = blk_mq_unique_tag(scmd->request);
> + dq_index = blk_mq_unique_tag_to_hwq(blk_tag);
> + *dq_pointer = dq = &hisi_hba->dq[dq_index];
> + } else {
> + *dq_pointer = dq = sas_dev->dq;
> + }
> + }
> }
>
> port = to_hisi_sas_port(sas_port);
> @@ -438,22 +465,10 @@ static int hisi_sas_task_prep(struct sas_task *task,
> }
>
> if (hisi_hba->hw->slot_index_alloc)
> - rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device, NULL);
> - else {
> - struct scsi_cmnd *scsi_cmnd = NULL;
> -
> - if (task->uldd_task) {
> - struct ata_queued_cmd *qc;
> + rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device, scmd);
> + else
> + rc = hisi_sas_slot_index_alloc(hisi_hba, scmd);
>
> - if (dev_is_sata(device)) {
> - qc = task->uldd_task;
> - scsi_cmnd = qc->scsicmd;
> - } else {
> - scsi_cmnd = task->uldd_task;
> - }
> - }
> - rc = hisi_sas_slot_index_alloc(hisi_hba, scsi_cmnd);
> - }
> if (rc < 0)
> goto err_out_dif_dma_unmap;
>
> diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
> index 29119d0b27a7..03ba0416f910 100644
> --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
> +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
> @@ -512,6 +512,11 @@ module_param(auto_affine_msi_experimental, bool, 0444);
> MODULE_PARM_DESC(auto_affine_msi_experimental, "Enable auto-affinity of MSI IRQs as experimental:\n"
> "default is off");
>
> +static bool expose_mq_experimental;
> +module_param(expose_mq_experimental, bool, 0444);
> +MODULE_PARM_DESC(expose_mq_experimental, "Expose multiple hw queues to upper layer as experimental:\n"
> + "default is off");
> +
> static u32 hisi_sas_read32(struct hisi_hba *hisi_hba, u32 off)
> {
> void __iomem *regs = hisi_hba->regs + off;
> @@ -558,6 +563,11 @@ static u32 hisi_sas_phy_read32(struct hisi_hba *hisi_hba,
>
> static int bitmaps_alloc_v3_hw(struct hisi_hba *hisi_hba)
> {
> + if (expose_mq_experimental)
> + return sbitmap_init_node(&hisi_hba->slot_index_tags,
> + HISI_SAS_MAX_COMMANDS, -1,
> + GFP_KERNEL,
> + dev_to_node(hisi_hba->dev));
> return sbitmap_init_node(&hisi_hba->slot_index_tags,
> HISI_SAS_UNRESERVED_IPTT, -1,
> GFP_KERNEL, dev_to_node(hisi_hba->dev));
> @@ -570,6 +580,10 @@ static int slot_index_alloc_v3_hw(struct hisi_hba *hisi_hba,
> struct sbitmap *slot_index_tags = &hisi_hba->slot_index_tags;
> int index;
>
> + if (expose_mq_experimental)
> + return sbitmap_get(slot_index_tags,
> + hisi_hba->sbitmap_alloc_hint, false);
> +
> if (scmd)
> return scmd->request->tag;
>
> @@ -583,7 +597,10 @@ static void slot_index_free_v3_hw(struct hisi_hba *hisi_hba, int slot_idx)
> {
> struct sbitmap *slot_index_tags = &hisi_hba->slot_index_tags;
>
> - if (slot_idx >= HISI_SAS_UNRESERVED_IPTT)
> + if (expose_mq_experimental) {
> + sbitmap_clear_bit(slot_index_tags, slot_idx);
> + hisi_hba->sbitmap_alloc_hint = slot_idx;
> + } else if (slot_idx >= HISI_SAS_UNRESERVED_IPTT)
> sbitmap_clear_bit(slot_index_tags,
> slot_idx - HISI_SAS_UNRESERVED_IPTT);
> }
> @@ -2414,8 +2431,9 @@ static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba)
> struct device *dev = hisi_hba->dev;
> int vectors;
> int max_msi = HISI_SAS_MSI_COUNT_V3_HW, min_msi;
> + struct Scsi_Host *shost = hisi_hba->shost;
>
> - if (auto_affine_msi_experimental) {
> + if (auto_affine_msi_experimental || expose_mq_experimental) {
> struct irq_affinity desc = {
> .pre_vectors = BASE_VECTORS_V3_HW,
> };
> @@ -2434,7 +2452,9 @@ static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba)
> &desc);
> if (vectors < 0)
> return -ENOENT;
> - setup_reply_map_v3_hw(hisi_hba, vectors - BASE_VECTORS_V3_HW);
> + if (auto_affine_msi_experimental)
> + setup_reply_map_v3_hw(hisi_hba,
> + vectors - BASE_VECTORS_V3_HW);
> } else {
> min_msi = max_msi;
> vectors = pci_alloc_irq_vectors(hisi_hba->pci_dev, min_msi,
> @@ -2444,6 +2464,9 @@ static int interrupt_preinit_v3_hw(struct hisi_hba *hisi_hba)
> }
>
> hisi_hba->cq_nvecs = vectors - BASE_VECTORS_V3_HW;
> + if (expose_mq_experimental)
> + shost->nr_hw_queues = hisi_hba->cq_nvecs;
> +
> return 0;
> }
>
> @@ -3096,6 +3119,17 @@ static int debugfs_set_bist_v3_hw(struct hisi_hba *hisi_hba, bool enable)
> return 0;
> }
>
> +static int hisi_sas_map_queues(struct Scsi_Host *shost)
> +{
> + struct hisi_hba *hisi_hba = shost_priv(shost);
> + struct blk_mq_queue_map *qmap = &shost->tag_set.map[HCTX_TYPE_DEFAULT];
> +
> + if (expose_mq_experimental)
> + return blk_mq_pci_map_queues(qmap, hisi_hba->pci_dev,
> + BASE_VECTORS_V3_HW);
> + return blk_mq_map_queues(qmap);
> +}
> +
> static struct scsi_host_template sht_v3_hw = {
> .name = DRV_NAME,
> .module = THIS_MODULE,
> @@ -3104,6 +3138,7 @@ static struct scsi_host_template sht_v3_hw = {
> .slave_configure = hisi_sas_slave_configure,
> .scan_finished = hisi_sas_scan_finished,
> .scan_start = hisi_sas_scan_start,
> + .map_queues = hisi_sas_map_queues,
> .change_queue_depth = sas_change_queue_depth,
> .bios_param = sas_bios_param,
> .this_id = -1,
> @@ -3265,8 +3300,14 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> shost->max_lun = ~0;
> shost->max_channel = 1;
> shost->max_cmd_len = 16;
> - shost->can_queue = HISI_SAS_UNRESERVED_IPTT;
> - shost->cmd_per_lun = HISI_SAS_UNRESERVED_IPTT;
> +
> + if (expose_mq_experimental) {
> + shost->can_queue = HISI_SAS_MAX_COMMANDS;
> + shost->cmd_per_lun = HISI_SAS_MAX_COMMANDS;

The above is contradictory with current 'nr_hw_queues''s meaning,
see commit on Scsi_Host.nr_hw_queues.


/*
* In scsi-mq mode, the number of hardware queues supported by the LLD.
*
* Note: it is assumed that each hardware queue has a queue depth of
* can_queue. In other words, the total queue depth per host
* is nr_hw_queues * can_queue.
*/

Also this implementation wastes memory too much.


thanks,
Ming