Re: [PATCH v3 2/2] virtio_balloon: replace oom notifier with shrinker

From: Michael S. Tsirkin
Date: Fri Aug 03 2018 - 15:15:38 EST


On Fri, Aug 03, 2018 at 04:32:26PM +0800, Wei Wang wrote:
> The OOM notifier is getting deprecated to use for the reasons:
> - As a callout from the oom context, it is too subtle and easy to
> generate bugs and corner cases which are hard to track;
> - It is called too late (after the reclaiming has been performed).
> Drivers with large amuont of reclaimable memory is expected to
> release them at an early stage of memory pressure;
> - The notifier callback isn't aware of oom contrains;
> Link: https://lkml.org/lkml/2018/7/12/314
>
> This patch replaces the virtio-balloon oom notifier with a shrinker
> to release balloon pages on memory pressure. The balloon pages are
> given back to mm adaptively by returning the number of pages that the
> reclaimer is asking for (i.e. sc->nr_to_scan).
>
> Currently the max possible value of sc->nr_to_scan passed to the balloon
> shrinker is SHRINK_BATCH, which is 128. This is smaller than the
> limitation that only VIRTIO_BALLOON_ARRAY_PFNS_MAX (256) pages can be
> returned via one invocation of leak_balloon. But this patch still
> considers the case that SHRINK_BATCH or shrinker->batch could be changed
> to a value larger than VIRTIO_BALLOON_ARRAY_PFNS_MAX, which will need to
> do multiple invocations of leak_balloon.
>
> Historically, the feature VIRTIO_BALLOON_F_DEFLATE_ON_OOM has been used
> to release balloon pages on OOM. We continue to use this feature bit for
> the shrinker, so the shrinker is only registered when this feature bit
> has been negotiated with host.
>
> Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx>
> Cc: Michael S. Tsirkin <mst@xxxxxxxxxx>
> Cc: Michal Hocko <mhocko@xxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>


Could you add data at how was this tested and how did guest
behaviour change. Which configurations see an improvement?

> ---
> drivers/virtio/virtio_balloon.c | 111 ++++++++++++++++++++++------------------
> 1 file changed, 60 insertions(+), 51 deletions(-)
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 8100e77..612a359 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -27,7 +27,6 @@
> #include <linux/slab.h>
> #include <linux/module.h>
> #include <linux/balloon_compaction.h>
> -#include <linux/oom.h>
> #include <linux/wait.h>
> #include <linux/mm.h>
> #include <linux/mount.h>
> @@ -40,13 +39,8 @@
> */
> #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
> #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
> -#define OOM_VBALLOON_DEFAULT_PAGES 256
> #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
>
> -static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES;
> -module_param(oom_pages, int, S_IRUSR | S_IWUSR);
> -MODULE_PARM_DESC(oom_pages, "pages to free on OOM");
> -
> #ifdef CONFIG_BALLOON_COMPACTION
> static struct vfsmount *balloon_mnt;
> #endif
> @@ -86,8 +80,8 @@ struct virtio_balloon {
> /* Memory statistics */
> struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
>
> - /* To register callback in oom notifier call chain */
> - struct notifier_block nb;
> + /* To register a shrinker to shrink memory upon memory pressure */
> + struct shrinker shrinker;
> };
>
> static struct virtio_device_id id_table[] = {
> @@ -365,38 +359,6 @@ static void update_balloon_size(struct virtio_balloon *vb)
> &actual);
> }
>
> -/*
> - * virtballoon_oom_notify - release pages when system is under severe
> - * memory pressure (called from out_of_memory())
> - * @self : notifier block struct
> - * @dummy: not used
> - * @parm : returned - number of freed pages
> - *
> - * The balancing of memory by use of the virtio balloon should not cause
> - * the termination of processes while there are pages in the balloon.
> - * If virtio balloon manages to release some memory, it will make the
> - * system return and retry the allocation that forced the OOM killer
> - * to run.
> - */
> -static int virtballoon_oom_notify(struct notifier_block *self,
> - unsigned long dummy, void *parm)
> -{
> - struct virtio_balloon *vb;
> - unsigned long *freed;
> - unsigned num_freed_pages;
> -
> - vb = container_of(self, struct virtio_balloon, nb);
> - if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> - return NOTIFY_OK;
> -
> - freed = parm;
> - num_freed_pages = leak_balloon(vb, oom_pages);
> - update_balloon_size(vb);
> - *freed += num_freed_pages;
> -
> - return NOTIFY_OK;
> -}
> -
> static void update_balloon_stats_func(struct work_struct *work)
> {
> struct virtio_balloon *vb;
> @@ -550,6 +512,53 @@ static struct file_system_type balloon_fs = {
>
> #endif /* CONFIG_BALLOON_COMPACTION */
>
> +static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
> + struct shrink_control *sc)
> +{
> + unsigned long pages_to_free, pages_freed = 0;
> + struct virtio_balloon *vb = container_of(shrinker,
> + struct virtio_balloon, shrinker);
> +
> + pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
> +
> + /*
> + * One invocation of leak_balloon can deflate at most
> + * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
> + * multiple times to deflate pages till reaching pages_to_free.
> + */
> + while (vb->num_pages && pages_to_free) {
> + pages_to_free -= pages_freed;
> + pages_freed += leak_balloon(vb, pages_to_free);
> + }
> + update_balloon_size(vb);
> +
> + return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE;
> +}
> +
> +static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
> + struct shrink_control *sc)
> +{
> + struct virtio_balloon *vb = container_of(shrinker,
> + struct virtio_balloon, shrinker);
> +
> + return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
> +}
> +
> +static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
> +{
> + unregister_shrinker(&vb->shrinker);
> +}
> +
> +static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
> +{
> + vb->shrinker.scan_objects = virtio_balloon_shrinker_scan;
> + vb->shrinker.count_objects = virtio_balloon_shrinker_count;
> + vb->shrinker.batch = 0;
> + vb->shrinker.seeks = DEFAULT_SEEKS;
> +
> + return register_shrinker(&vb->shrinker);
> +}
> +
> static int virtballoon_probe(struct virtio_device *vdev)
> {
> struct virtio_balloon *vb;
> @@ -582,17 +591,10 @@ static int virtballoon_probe(struct virtio_device *vdev)
> if (err)
> goto out_free_vb;
>
> - vb->nb.notifier_call = virtballoon_oom_notify;
> - vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY;
> - err = register_oom_notifier(&vb->nb);
> - if (err < 0)
> - goto out_del_vqs;
> -
> #ifdef CONFIG_BALLOON_COMPACTION
> balloon_mnt = kern_mount(&balloon_fs);
> if (IS_ERR(balloon_mnt)) {
> err = PTR_ERR(balloon_mnt);
> - unregister_oom_notifier(&vb->nb);
> goto out_del_vqs;
> }
>
> @@ -601,13 +603,20 @@ static int virtballoon_probe(struct virtio_device *vdev)
> if (IS_ERR(vb->vb_dev_info.inode)) {
> err = PTR_ERR(vb->vb_dev_info.inode);
> kern_unmount(balloon_mnt);
> - unregister_oom_notifier(&vb->nb);
> vb->vb_dev_info.inode = NULL;
> goto out_del_vqs;
> }
> vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
> #endif
> -
> + /*
> + * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
> + * shrinker needs to be registered to relieve memory pressure.
> + */
> + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
> + err = virtio_balloon_register_shrinker(vb);
> + if (err)
> + goto out_del_vqs;
> + }
> virtio_device_ready(vdev);
>
> if (towards_target(vb))
> @@ -639,8 +648,8 @@ static void virtballoon_remove(struct virtio_device *vdev)
> {
> struct virtio_balloon *vb = vdev->priv;
>
> - unregister_oom_notifier(&vb->nb);
> -
> + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM))
> + virtio_balloon_unregister_shrinker(vb);
> spin_lock_irq(&vb->stop_update_lock);
> vb->stop_update = true;
> spin_unlock_irq(&vb->stop_update_lock);
> --
> 2.7.4