Re: [PATCH v3 1/5] hugetlb: add demote hugetlb page sysfs interfaces

From: Oscar Salvador
Date: Tue Oct 05 2021 - 04:24:05 EST


On 2021-10-01 19:52, Mike Kravetz wrote:
+static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
+ __must_hold(&hugetlb_lock)
+{
+ int rc = 0;
+
+ lockdep_assert_held(&hugetlb_lock);
+
+ /* We should never get here if no demote order */
+ if (!h->demote_order)
+ return rc;

Probably add a warning here? If we should never reach this but we do, then
something got screwed along the way and we might want to know.

+static ssize_t demote_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t len)
+{
+ unsigned long nr_demote;
+ unsigned long nr_available;
+ nodemask_t nodes_allowed, *n_mask;
+ struct hstate *h;
+ int err;
+ int nid;
+
+ err = kstrtoul(buf, 10, &nr_demote);
+ if (err)
+ return err;
+ h = kobj_to_hstate(kobj, &nid);
+
+ /* Synchronize with other sysfs operations modifying huge pages */
+ mutex_lock(&h->resize_lock);
+
+ spin_lock_irq(&hugetlb_lock);
+ if (nid != NUMA_NO_NODE) {
+ init_nodemask_of_node(&nodes_allowed, nid);
+ n_mask = &nodes_allowed;
+ } else {
+ n_mask = &node_states[N_MEMORY];
+ }

Cannot the n_mask dance be outside the locks? That does not need to be protected, right?

+
+ while (nr_demote) {
+ /*
+ * Check for available pages to demote each time thorough the
+ * loop as demote_pool_huge_page will drop hugetlb_lock.
+ *
+ * NOTE: demote_pool_huge_page does not yet drop hugetlb_lock
+ * but will when full demote functionality is added in a later
+ * patch.
+ */
+ if (nid != NUMA_NO_NODE)
+ nr_available = h->free_huge_pages_node[nid];
+ else
+ nr_available = h->free_huge_pages;
+ nr_available -= h->resv_huge_pages;
+ if (!nr_available)
+ break;
+
+ if (!demote_pool_huge_page(h, n_mask))
+ break;

Is it possible that when demote_pool_huge_page() drops the lock,
h->resv_huge_pages change? Or that can only happen under h->resize_lock?

+
+ nr_demote--;
+ }
+
+ spin_unlock_irq(&hugetlb_lock);
+ mutex_unlock(&h->resize_lock);
+
+ return len;
+}
+HSTATE_ATTR_WO(demote);
+
+static ssize_t demote_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct hstate *h;
+ unsigned long demote_size;
+ int nid;
+
+ h = kobj_to_hstate(kobj, &nid);
+ demote_size = h->demote_order;

This has already been pointed out.

+
+ return sysfs_emit(buf, "%lukB\n",
+ (unsigned long)(PAGE_SIZE << h->demote_order) / SZ_1K);
+}
+
+static ssize_t demote_size_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hstate *h, *t_hstate;
+ unsigned long demote_size;
+ unsigned int demote_order;
+ int nid;

This is just a nit-pick, but what is t_hstate? demote_hstate might be more descriptive.

+
+ demote_size = (unsigned long)memparse(buf, NULL);
+
+ t_hstate = size_to_hstate(demote_size);
+ if (!t_hstate)
+ return -EINVAL;
+ demote_order = t_hstate->order;
+
+ /* demote order must be smaller hstate order */

"must be smaller than"

--
Oscar Salvador
SUSE Labs