[RFC PATCH 11/11] scsi: storvsc: Re-init stor_chns when a channel interrupt is re-assigned

From: Andrea Parri (Microsoft)
Date: Wed Mar 25 2020 - 18:56:54 EST


For each storvsc_device, storvsc keeps track of the channel target CPUs
associated to the device (alloced_cpus) and it uses this information to
fill a "cache" (stor_chns) mapping CPU->channel according to a certain
heuristic. Update the alloced_cpus mask and the stor_chns array when a
channel of the storvsc device is re-assigned to a different CPU.

Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@xxxxxxxxx>
Cc: "James E.J. Bottomley" <jejb@xxxxxxxxxxxxx>
Cc: "Martin K. Petersen" <martin.petersen@xxxxxxxxxx>
Cc: <linux-scsi@xxxxxxxxxxxxxxx>
---
drivers/hv/vmbus_drv.c | 4 ++
drivers/scsi/storvsc_drv.c | 95 ++++++++++++++++++++++++++++++++++----
include/linux/hyperv.h | 3 ++
3 files changed, 94 insertions(+), 8 deletions(-)

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 84d2f22c569aa..7199fee2b5869 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1721,6 +1721,10 @@ static ssize_t target_cpu_store(struct vmbus_channel *channel,
* in on a CPU that is different from the channel target_cpu value.
*/

+ if (channel->change_target_cpu_callback)
+ (*channel->change_target_cpu_callback)(channel,
+ channel->target_cpu, target_cpu);
+
channel->target_cpu = target_cpu;
channel->target_vp = hv_cpu_number_to_vp_number(target_cpu);
channel->numa_node = cpu_to_node(target_cpu);
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index fb41636519ee8..a680592b9d32a 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -621,6 +621,63 @@ static inline struct storvsc_device *get_in_stor_device(

}

+void storvsc_change_target_cpu(struct vmbus_channel *channel, u32 old, u32 new)
+{
+ struct storvsc_device *stor_device;
+ struct vmbus_channel *cur_chn;
+ bool old_is_alloced = false;
+ struct hv_device *device;
+ unsigned long flags;
+ int cpu;
+
+ device = channel->primary_channel ?
+ channel->primary_channel->device_obj
+ : channel->device_obj;
+ stor_device = get_out_stor_device(device);
+ if (!stor_device)
+ return;
+
+ /* See storvsc_do_io() -> get_og_chn(). */
+ spin_lock_irqsave(&device->channel->lock, flags);
+
+ /*
+ * Determines if the storvsc device has other channels assigned to
+ * the "old" CPU to update the alloced_cpus mask and the stor_chns
+ * array.
+ */
+ if (device->channel != channel && device->channel->target_cpu == old) {
+ cur_chn = device->channel;
+ old_is_alloced = true;
+ goto old_is_alloced;
+ }
+ list_for_each_entry(cur_chn, &device->channel->sc_list, sc_list) {
+ if (cur_chn == channel)
+ continue;
+ if (cur_chn->target_cpu == old) {
+ old_is_alloced = true;
+ goto old_is_alloced;
+ }
+ }
+
+old_is_alloced:
+ if (old_is_alloced)
+ WRITE_ONCE(stor_device->stor_chns[old], cur_chn);
+ else
+ cpumask_clear_cpu(old, &stor_device->alloced_cpus);
+
+ /* "Flush" the stor_chns array. */
+ for_each_possible_cpu(cpu) {
+ if (stor_device->stor_chns[cpu] && !cpumask_test_cpu(
+ cpu, &stor_device->alloced_cpus))
+ WRITE_ONCE(stor_device->stor_chns[cpu], NULL);
+ }
+
+ WRITE_ONCE(stor_device->stor_chns[new], channel);
+ cpumask_set_cpu(new, &stor_device->alloced_cpus);
+
+ spin_unlock_irqrestore(&device->channel->lock, flags);
+}
+
static void handle_sc_creation(struct vmbus_channel *new_sc)
{
struct hv_device *device = new_sc->primary_channel->device_obj;
@@ -648,6 +705,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc)
return;
}

+ new_sc->change_target_cpu_callback = storvsc_change_target_cpu;
+
/* Add the sub-channel to the array of available channels. */
stor_device->stor_chns[new_sc->target_cpu] = new_sc;
cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
@@ -876,6 +935,8 @@ static int storvsc_channel_init(struct hv_device *device, bool is_fc)
if (stor_device->stor_chns == NULL)
return -ENOMEM;

+ device->channel->change_target_cpu_callback = storvsc_change_target_cpu;
+
stor_device->stor_chns[device->channel->target_cpu] = device->channel;
cpumask_set_cpu(device->channel->target_cpu,
&stor_device->alloced_cpus);
@@ -1248,8 +1309,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
const struct cpumask *node_mask;
int num_channels, tgt_cpu;

- if (stor_device->num_sc == 0)
+ if (stor_device->num_sc == 0) {
+ stor_device->stor_chns[q_num] = stor_device->device->channel;
return stor_device->device->channel;
+ }

/*
* Our channel array is sparsley populated and we
@@ -1258,7 +1321,6 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
* The strategy is simple:
* I. Ensure NUMA locality
* II. Distribute evenly (best effort)
- * III. Mapping is persistent.
*/

node_mask = cpumask_of_node(cpu_to_node(q_num));
@@ -1268,8 +1330,10 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device,
if (cpumask_test_cpu(tgt_cpu, node_mask))
num_channels++;
}
- if (num_channels == 0)
+ if (num_channels == 0) {
+ stor_device->stor_chns[q_num] = stor_device->device->channel;
return stor_device->device->channel;
+ }

hash_qnum = q_num;
while (hash_qnum >= num_channels)
@@ -1295,6 +1359,7 @@ static int storvsc_do_io(struct hv_device *device,
struct storvsc_device *stor_device;
struct vstor_packet *vstor_packet;
struct vmbus_channel *outgoing_channel, *channel;
+ unsigned long flags;
int ret = 0;
const struct cpumask *node_mask;
int tgt_cpu;
@@ -1308,10 +1373,11 @@ static int storvsc_do_io(struct hv_device *device,

request->device = device;
/*
- * Select an an appropriate channel to send the request out.
+ * Select an appropriate channel to send the request out.
*/
- if (stor_device->stor_chns[q_num] != NULL) {
- outgoing_channel = stor_device->stor_chns[q_num];
+ /* See storvsc_change_target_cpu(). */
+ outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]);
+ if (outgoing_channel != NULL) {
if (outgoing_channel->target_cpu == q_num) {
/*
* Ideally, we want to pick a different channel if
@@ -1324,7 +1390,10 @@ static int storvsc_do_io(struct hv_device *device,
continue;
if (tgt_cpu == q_num)
continue;
- channel = stor_device->stor_chns[tgt_cpu];
+ channel = READ_ONCE(
+ stor_device->stor_chns[tgt_cpu]);
+ if (channel == NULL)
+ continue;
if (hv_get_avail_to_write_percent(
&channel->outbound)
> ring_avail_percent_lowater) {
@@ -1350,7 +1419,10 @@ static int storvsc_do_io(struct hv_device *device,
for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) {
if (cpumask_test_cpu(tgt_cpu, node_mask))
continue;
- channel = stor_device->stor_chns[tgt_cpu];
+ channel = READ_ONCE(
+ stor_device->stor_chns[tgt_cpu]);
+ if (channel == NULL)
+ continue;
if (hv_get_avail_to_write_percent(
&channel->outbound)
> ring_avail_percent_lowater) {
@@ -1360,7 +1432,14 @@ static int storvsc_do_io(struct hv_device *device,
}
}
} else {
+ spin_lock_irqsave(&device->channel->lock, flags);
+ outgoing_channel = stor_device->stor_chns[q_num];
+ if (outgoing_channel != NULL) {
+ spin_unlock_irqrestore(&device->channel->lock, flags);
+ goto found_channel;
+ }
outgoing_channel = get_og_chn(stor_device, q_num);
+ spin_unlock_irqrestore(&device->channel->lock, flags);
}

found_channel:
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index edfcd42319ef3..9018b89614b78 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -773,6 +773,9 @@ struct vmbus_channel {
void (*onchannel_callback)(void *context);
void *channel_callback_context;

+ void (*change_target_cpu_callback)(struct vmbus_channel *channel,
+ u32 old, u32 new);
+
/*
* Synchronize channel scheduling and channel removal; see the inline
* comments in vmbus_chan_sched() and vmbus_reset_channel_cb().
--
2.24.0