Re: [PATCH] virtio: a new vcpu watchdog driver

From: Randy Dunlap
Date: Sun Jul 30 2023 - 22:23:22 EST


Hi--

On 7/30/23 18:25, zhanghao1 wrote:
> A new virtio pci driver is added for listening to vcpus
> inside guest. Each vcpu creates a corresponding thread to
> periodically send data to qemu's back-end watchdog device.
> If a vCPU is in the stall state, data cannot be sent to
> back-end virtio device. As a result, the back-end device
> can detect that the guest is in the stall state.
>
> The driver is mainly used with the back-end watchdog device of qemu.
>
> The qemu backend watchdog device is implemented as follow:
> https://lore.kernel.org/qemu-devel/20230705081813.411526-1-zhanghao1@xxxxxxxxxx/
>
> Signed-off-by: zhanghao1 <zhanghao1@xxxxxxxxxx>
> ---
> drivers/virtio/Kconfig | 9 +
> drivers/virtio/Makefile | 1 +
> drivers/virtio/virtio_vcpu_stall_detector.c | 299 ++++++++++++++++++++
> 3 files changed, 309 insertions(+)
> create mode 100644 drivers/virtio/virtio_vcpu_stall_detector.c
>
> diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
> index 0a53a61231c2..869323e345a1 100644
> --- a/drivers/virtio/Kconfig
> +++ b/drivers/virtio/Kconfig
> @@ -173,4 +173,13 @@ config VIRTIO_DMA_SHARED_BUFFER
> This option adds a flavor of dma buffers that are backed by
> virtio resources.
>
> +config VIRTIO_VCPU_WATCHDOG
> + tristate "Virtio vcpu watchdog driver"
> + depends on VIRTIO_PCI
> + help
> + When this driver is bound inside a KVM guest, it will
> + periodically "pet" an PCI virtio watchdog device from each vCPU

a PCI

> + and allow the host to detect vCPU stalls.
> +
> + If you do not intend to run this kernel as a guest, say N.

Kconfig help text should be indented with one tab + 2 spaces
according to coding-style.rst.

> endif # VIRTIO_MENU
> diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
> index 8e98d24917cc..c7341f078a34 100644
> --- a/drivers/virtio/Makefile
> +++ b/drivers/virtio/Makefile
> @@ -12,3 +12,4 @@ obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o
> obj-$(CONFIG_VIRTIO_VDPA) += virtio_vdpa.o
> obj-$(CONFIG_VIRTIO_MEM) += virtio_mem.o
> obj-$(CONFIG_VIRTIO_DMA_SHARED_BUFFER) += virtio_dma_buf.o
> +obj-$(CONFIG_VIRTIO_VCPU_WATCHDOG) += virtio_vcpu_stall_detector.o
> diff --git a/drivers/virtio/virtio_vcpu_stall_detector.c b/drivers/virtio/virtio_vcpu_stall_detector.c
> new file mode 100644
> index 000000000000..58344ca528be
> --- /dev/null
> +++ b/drivers/virtio/virtio_vcpu_stall_detector.c
> @@ -0,0 +1,299 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +//
> +// VCPU stall detector.
> +// Copyright (C) Kylin Software, 2023
> +
> +#include <linux/cpu.h>
> +#include <linux/init.h>
> +#include <linux/io.h>
> +#include <linux/kernel.h>
> +
> +#include <linux/device.h>
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <linux/nmi.h>
> +#include <uapi/linux/virtio_ids.h>
> +#include <linux/virtio_config.h>
> +#include <linux/param.h>
> +#include <linux/percpu.h>
> +#include <linux/slab.h>
> +
> +#define VCPU_STALL_REG_STATUS (0x00)
> +#define VCPU_STALL_REG_LOAD_CNT (0x04)
> +#define VCPU_STALL_REG_CURRENT_CNT (0x08)
> +#define VCPU_STALL_REG_CLOCK_FREQ_HZ (0x0C)
> +#define VCPU_STALL_REG_LEN (0x10)
> +#define VCPU_STALL_REG_TIMEOUT_SEC (0x14)
> +
> +#define VCPU_STALL_DEFAULT_CLOCK_HZ (10)
> +#define VCPU_STALL_MAX_CLOCK_HZ (100)
> +#define VCPU_STALL_DEFAULT_TIMEOUT_SEC (8)
> +#define VCPU_STALL_MAX_TIMEOUT_SEC (600)
> +
> +struct vcpu_stall_detect_config {
> + u32 clock_freq_hz;
> + u32 stall_timeout_sec;
> +
> + enum cpuhp_state hp_online;
> +};
> +
> +struct vcpu_stall_priv {
> + struct hrtimer vcpu_hrtimer;
> + struct virtio_device *vdev;
> + u32 cpu_id;
> +};
> +
> +struct vcpu_stall {
> + struct vcpu_stall_priv *priv;
> + struct virtqueue *vq;
> + spinlock_t lock;
> + struct pet_event {
> + u32 cpu_id;
> + bool is_initialized;
> + u32 ticks;
> + } pet_event;
> +};
> +
> +static const struct virtio_device_id vcpu_stall_id_table[] = {
> + { VIRTIO_ID_WATCHDOG, VIRTIO_DEV_ANY_ID },
> + { 0, },
> +};
> +
> +/* The vcpu stall configuration structure which applies to all the CPUs */
> +static struct vcpu_stall_detect_config vcpu_stall_config;
> +static struct vcpu_stall *vcpu_stall;
> +
> +static struct vcpu_stall_priv __percpu *vcpu_stall_detectors;
> +
> +static enum hrtimer_restart
> +vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer)

One line instead of the 2 lines above.

> +{

--
~Randy