Re: kexec reboot broken with ioatdma?

From: Dan Williams
Date: Fri Dec 18 2009 - 17:11:30 EST


Subject: ioat2,3: put channel hardware in known state at init

Put the ioat2 and ioat3 state machines in the halted state with all
errors cleared.

The ioat1 init path is not disturbed for stability, there are no
reported ioat1 initialization issues.

Reported-by: Roland Dreier <rdreier@xxxxxxxxx>
Not-Yet-Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---
On Wed, 2009-12-16 at 16:23 -0700, Roland Dreier wrote:
> > I agree that is more robust if the init path copes with hardware
> > arriving in an unknown state. I'll look into adding a channel reset
> > in the init path (something that should probably have been there since
> > the beginning).
>
> Great, let me know and I'll try it on my system with kexec.
>

I was able to recover stuck channels with this patch, let me know if it
resolves the kexec issue.

Thanks,
Dan

drivers/dma/ioat/dma.c | 2 +
drivers/dma/ioat/dma.h | 18 +++++++++++
drivers/dma/ioat/dma_v2.c | 69 ++++++++++++++++++++++++++++++++----------
drivers/dma/ioat/dma_v2.h | 2 +
drivers/dma/ioat/dma_v3.c | 54 ++++++++++++++++++++++++---------
drivers/dma/ioat/registers.h | 1 +
6 files changed, 114 insertions(+), 32 deletions(-)


diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index c524d36..dcc4ab7 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -1032,7 +1032,7 @@ int __devinit ioat_probe(struct ioatdma_device *device)
dma->dev = &pdev->dev;

if (!dma->chancnt) {
- dev_err(dev, "zero channels detected\n");
+ dev_err(dev, "channel enumeration error\n");
goto err_setup_interrupts;
}

diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 45edde9..bbc3e78 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -60,6 +60,7 @@
* @dca: direct cache access context
* @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
* @enumerate_channels: hw version specific channel enumeration
+ * @reset_hw: hw version specific channel (re)initialization
* @cleanup_tasklet: select between the v2 and v3 cleanup routines
* @timer_fn: select between the v2 and v3 timer watchdog routines
* @self_test: hardware version specific self test for each supported op type
@@ -78,6 +79,7 @@ struct ioatdma_device {
struct dca_provider *dca;
void (*intr_quirk)(struct ioatdma_device *device);
int (*enumerate_channels)(struct ioatdma_device *device);
+ int (*reset_hw)(struct ioat_chan_common *chan);
void (*cleanup_tasklet)(unsigned long data);
void (*timer_fn)(unsigned long data);
int (*self_test)(struct ioatdma_device *device);
@@ -264,6 +266,22 @@ static inline void ioat_suspend(struct ioat_chan_common *chan)
writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
}

+static inline void ioat_reset(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+
+ writeb(IOAT_CHANCMD_RESET, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline bool ioat_reset_pending(struct ioat_chan_common *chan)
+{
+ u8 ver = chan->device->version;
+ u8 cmd;
+
+ cmd = readb(chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+ return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
+}
+
static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr)
{
struct ioat_chan_common *chan = &ioat->base;
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 8f1f7f0..5f7a500 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -239,20 +239,50 @@ void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
__ioat2_start_null_desc(ioat);
}

-static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
+int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo)
{
- struct ioat_chan_common *chan = &ioat->base;
- unsigned long phys_complete;
+ unsigned long end = jiffies + tmo;
+ int err = 0;
u32 status;

status = ioat_chansts(chan);
if (is_ioat_active(status) || is_ioat_idle(status))
ioat_suspend(chan);
while (is_ioat_active(status) || is_ioat_idle(status)) {
+ if (end && time_after(jiffies, end)) {
+ err = -ETIMEDOUT;
+ break;
+ }
status = ioat_chansts(chan);
cpu_relax();
}

+ return err;
+}
+
+int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo)
+{
+ unsigned long end = jiffies + tmo;
+ int err = 0;
+
+ ioat_reset(chan);
+ while (ioat_reset_pending(chan)) {
+ if (end && time_after(jiffies, end)) {
+ err = -ETIMEDOUT;
+ break;
+ }
+ cpu_relax();
+ }
+
+ return err;
+}
+
+static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
+{
+ struct ioat_chan_common *chan = &ioat->base;
+ unsigned long phys_complete;
+
+ ioat2_quiesce(chan, 0);
if (ioat_cleanup_preamble(chan, &phys_complete))
__cleanup(ioat, phys_complete);

@@ -318,6 +348,19 @@ void ioat2_timer_event(unsigned long data)
spin_unlock_bh(&chan->cleanup_lock);
}

+static int ioat2_reset_hw(struct ioat_chan_common *chan)
+{
+ /* throw away whatever the channel was doing and get it initialized */
+ u32 chanerr;
+
+ ioat2_quiesce(chan, msecs_to_jiffies(100));
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+
+ return ioat2_reset_sync(chan, msecs_to_jiffies(200));
+}
+
/**
* ioat2_enumerate_channels - find and initialize the device's channels
* @device: the device to be enumerated
@@ -360,6 +403,10 @@ int ioat2_enumerate_channels(struct ioatdma_device *device)
(unsigned long) ioat);
ioat->xfercap_log = xfercap_log;
spin_lock_init(&ioat->ring_lock);
+ if (device->reset_hw(&ioat->base)) {
+ i = 0;
+ break;
+ }
}
dma->chancnt = i;
return i;
@@ -467,7 +514,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c)
struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
struct ioat_chan_common *chan = &ioat->base;
struct ioat_ring_ent **ring;
- u32 chanerr;
int order;

/* have we already been set up? */
@@ -477,12 +523,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c)
/* Setup register to interrupt and write completion status on error */
writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);

- chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
- if (chanerr) {
- dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
- writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
- }
-
/* allocate a completion writeback area */
/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
chan->completion = pci_pool_alloc(chan->device->completion_pool,
@@ -746,13 +786,7 @@ void ioat2_free_chan_resources(struct dma_chan *c)
tasklet_disable(&chan->cleanup_task);
del_timer_sync(&chan->timer);
device->cleanup_tasklet((unsigned long) ioat);
-
- /* Delay 100ms after reset to allow internal DMA logic to quiesce
- * before removing DMA descriptor resources.
- */
- writeb(IOAT_CHANCMD_RESET,
- chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
- mdelay(100);
+ device->reset_hw(chan);

spin_lock_bh(&ioat->ring_lock);
descs = ioat2_ring_space(ioat);
@@ -839,6 +873,7 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca)
int err;

device->enumerate_channels = ioat2_enumerate_channels;
+ device->reset_hw = ioat2_reset_hw;
device->cleanup_tasklet = ioat2_cleanup_tasklet;
device->timer_fn = ioat2_timer_event;
device->self_test = ioat_dma_self_test;
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
index 1d849ef..3afad8d 100644
--- a/drivers/dma/ioat/dma_v2.h
+++ b/drivers/dma/ioat/dma_v2.h
@@ -185,6 +185,8 @@ bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
void ioat2_cleanup_tasklet(unsigned long data);
void ioat2_timer_event(unsigned long data);
+int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo);
+int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo);
extern struct kobj_type ioat2_ktype;
extern struct kmem_cache *ioat2_cache;
#endif /* IOATDMA_V2_H */
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index e58ecb2..9908c9e 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -1130,6 +1130,45 @@ static int __devinit ioat3_dma_self_test(struct ioatdma_device *device)
return 0;
}

+static int ioat3_reset_hw(struct ioat_chan_common *chan)
+{
+ /* throw away whatever the channel was doing and get it
+ * initialized, with ioat3 specific workarounds
+ */
+ struct ioatdma_device *device = chan->device;
+ struct pci_dev *pdev = device->pdev;
+ u32 chanerr;
+ u16 dev_id;
+ int err;
+
+ ioat2_quiesce(chan, msecs_to_jiffies(100));
+
+ chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+ writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+
+ /* -= IOAT ver.3 workarounds =- */
+ /* Write CHANERRMSK_INT with 3E07h to mask out the errors
+ * that can cause stability issues for IOAT ver.3, and clear any
+ * pending errors
+ */
+ pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
+ err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
+ if (err) {
+ dev_err(&pdev->dev, "channel error register unreachable\n");
+ return err;
+ }
+ pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
+
+ /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+ * (workaround for spurious config parity error after restart)
+ */
+ pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
+ if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
+ pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
+
+ return ioat2_reset_sync(chan, msecs_to_jiffies(200));
+}
+
int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
{
struct pci_dev *pdev = device->pdev;
@@ -1139,10 +1178,10 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
struct ioat_chan_common *chan;
bool is_raid_device = false;
int err;
- u16 dev_id;
u32 cap;

device->enumerate_channels = ioat2_enumerate_channels;
+ device->reset_hw = ioat3_reset_hw;
device->self_test = ioat3_dma_self_test;
dma = &device->common;
dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
@@ -1218,19 +1257,6 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
dma->device_prep_dma_xor_val = NULL;
#endif

- /* -= IOAT ver.3 workarounds =- */
- /* Write CHANERRMSK_INT with 3E07h to mask out the errors
- * that can cause stability issues for IOAT ver.3
- */
- pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07);
-
- /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
- * (workaround for spurious config parity error after restart)
- */
- pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
- if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0)
- pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10);
-
err = ioat_probe(device);
if (err)
return err;
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
index f015ec1..e8ae63b 100644
--- a/drivers/dma/ioat/registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -27,6 +27,7 @@

#define IOAT_PCI_DEVICE_ID_OFFSET 0x02
#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148
+#define IOAT_PCI_CHANERR_INT_OFFSET 0x180
#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184

/* MMIO Device Registers */


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/