Re: [REVIEW][PATCH] Making poll generally useful for sysctls

From: Eric W. Biederman
Date: Sat Mar 24 2012 - 03:55:36 EST


Lucas De Marchi <lucas.demarchi@xxxxxxxxxxxxxx> writes:

>> Â/* A sysctl table is an array of struct ctl_table: */
>> Âstruct ctl_table
>> Â{
>> Â Â Â Âconst char *procname; Â Â Â Â Â /* Text ID for /proc/sys, or zero */
>> Â Â Â Âvoid *data;
>> + Â Â Â atomic_t event;
>> Â Â Â Âint maxlen;
>> Â Â Â Âumode_t mode;
>> Â Â Â Âstruct ctl_table *child; Â Â Â Â/* Deprecated */
>> Â Â Â Âproc_handler *proc_handler; Â Â /* Callback for text formatting */
>> - Â Â Â struct ctl_table_poll *poll;
>> Â Â Â Âvoid *extra1;
>> Â Â Â Âvoid *extra2;
>> Â};
>> @@ -1042,6 +1025,7 @@ struct ctl_table_header
>> Â Â Â Â Â Â Â Â};
>> Â Â Â Â Â Â Â Âstruct rcu_head rcu;
>> Â Â Â Â};
>> + Â Â Â wait_queue_head_t wait;
>
> If you have a waitqueue per table instead of per entry you will get
> spurious notifications when other entries change. The easiest way to
> test this is to poll /proc/sys/kernel/hostname and change your
> domainname.

You will get spurious wakeups but not spurious notifications to
userspace since event is still per entry.

For my money that seemed a nice compromise of code simplicity, and
generality. We could of course do something much closer to what
sysfs does and allocate and refcount something similar to your
ctl_table_poll when we have a ctl_table opened. But that just looked
like a pain.

Of course we already have spurious notifications for hostname and
domainname when multiple uts namespaces are involved, but that
is a different problem.

> I couldn't apply this patch to any tree (linus/master + my previous
> patch, your master, 3.3 + my previous patch), so I couldn't test. On
> top of your tree:

How odd. It should have applied cleanly to my tree and it applies
with just a two line offset top of Linus's latest with my tree merged
in. Those two lines of offset coming from the two extra includes
that came in through the merge.

patch -p1 --dry-run < ~/tmp/sysctl-poll-test.patch
patching file fs/proc/proc_sysctl.c
Hunk #1 succeeded at 18 (offset 2 lines).
Hunk #2 succeeded at 173 (offset 2 lines).
Hunk #3 succeeded at 245 (offset 2 lines).
Hunk #4 succeeded at 512 (offset 2 lines).
Hunk #5 succeeded at 542 (offset 2 lines).
Hunk #6 succeeded at 561 (offset 2 lines).
patching file include/linux/sysctl.h
patching file kernel/utsname_sysctl.c

> [lucas@vader kernel]$ git am /tmp/a.patch
> Applying: Making poll generally useful for sysctls
> error: patch failed: fs/proc/proc_sysctl.c:16
> error: fs/proc/proc_sysctl.c: patch does not apply
> error: patch failed: include/linux/sysctl.h:992
> error: include/linux/sysctl.h: patch does not apply
> Patch failed at 0001 Making poll generally useful for sysctls

Here is rebased version of the patch just in case that helps.

---

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 21d836f..739615c 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -18,13 +18,15 @@ static const struct inode_operations proc_sys_inode_operations;
static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;

-void proc_sys_poll_notify(struct ctl_table_poll *poll)
+static inline void *proc_sys_poll_event(struct ctl_table *table)
{
- if (!poll)
- return;
+ return (void *)(unsigned long)atomic_read(&table->event);
+}

- atomic_inc(&poll->event);
- wake_up_interruptible(&poll->wait);
+void proc_sys_poll_notify(struct ctl_table_header *head, struct ctl_table *table)
+{
+ atomic_inc(&table->event);
+ wake_up_interruptible(&head->wait);
}

static struct ctl_table root_table[] = {
@@ -171,6 +173,7 @@ static void init_header(struct ctl_table_header *head,
for (entry = table; entry->procname; entry++, node++) {
rb_init_node(&node->node);
node->header = head;
+ atomic_set(&entry->event, 1);
}
}
}
@@ -242,6 +245,8 @@ static void start_unregistering(struct ctl_table_header *p)
/* anything non-NULL; we'll never dereference it */
p->unregistering = ERR_PTR(-EINVAL);
}
+ /* Don't let poll sleep forever on deleted entries */
+ wake_up_interruptible(&p->wait);
/*
* do not remove from the list until nobody holds it; walking the
* list in do_sysctl() relies on that.
@@ -507,6 +512,9 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
error = table->proc_handler(table, write, buf, &res, ppos);
if (!error)
error = res;
+
+ if (write)
+ proc_sys_poll_notify(head, table);
out:
sysctl_head_finish(head);

@@ -534,8 +542,7 @@ static int proc_sys_open(struct inode *inode, struct file *filp)
if (IS_ERR(head))
return PTR_ERR(head);

- if (table->poll)
- filp->private_data = proc_sys_poll_event(table->poll);
+ filp->private_data = proc_sys_poll_event(table);

sysctl_head_finish(head);

@@ -554,21 +561,14 @@ static unsigned int proc_sys_poll(struct file *filp, poll_table *wait)
if (IS_ERR(head))
return POLLERR | POLLHUP;

- if (!table->proc_handler)
- goto out;
-
- if (!table->poll)
- goto out;
-
event = (unsigned long)filp->private_data;
- poll_wait(filp, &table->poll->wait, wait);
+ poll_wait(filp, &head->wait, wait);

- if (event != atomic_read(&table->poll->event)) {
- filp->private_data = proc_sys_poll_event(table->poll);
+ if (event != atomic_read(&table->event)) {
+ filp->private_data = proc_sys_poll_event(table);
ret = POLLIN | POLLRDNORM | POLLERR | POLLPRI;
}

-out:
sysctl_head_finish(head);

return ret;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index c34b4c8..8647ee0 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -992,34 +992,17 @@ extern int proc_do_large_bitmap(struct ctl_table *, int,
* cover common cases.
*/

-/* Support for userspace poll() to watch for changes */
-struct ctl_table_poll {
- atomic_t event;
- wait_queue_head_t wait;
-};
-
-static inline void *proc_sys_poll_event(struct ctl_table_poll *poll)
-{
- return (void *)(unsigned long)atomic_read(&poll->event);
-}
-
-#define __CTL_TABLE_POLL_INITIALIZER(name) { \
- .event = ATOMIC_INIT(0), \
- .wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.wait) }
-
-#define DEFINE_CTL_TABLE_POLL(name) \
- struct ctl_table_poll name = __CTL_TABLE_POLL_INITIALIZER(name)

/* A sysctl table is an array of struct ctl_table: */
struct ctl_table
{
const char *procname; /* Text ID for /proc/sys, or zero */
void *data;
+ atomic_t event;
int maxlen;
umode_t mode;
struct ctl_table *child; /* Deprecated */
proc_handler *proc_handler; /* Callback for text formatting */
- struct ctl_table_poll *poll;
void *extra1;
void *extra2;
};
@@ -1042,6 +1025,7 @@ struct ctl_table_header
};
struct rcu_head rcu;
};
+ wait_queue_head_t wait;
struct completion *unregistering;
struct ctl_table *ctl_table_arg;
struct ctl_table_root *root;
@@ -1076,7 +1060,7 @@ struct ctl_path {

#ifdef CONFIG_SYSCTL

-void proc_sys_poll_notify(struct ctl_table_poll *poll);
+void proc_sys_poll_notify(struct ctl_table_header *head, struct ctl_table *table);

extern void setup_sysctl_set(struct ctl_table_set *p,
struct ctl_table_root *root,
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 63da38c..3e91a50 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -53,18 +53,12 @@ static int proc_do_uts_string(ctl_table *table, int write,
r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
put_uts(table, write, uts_table.data);

- if (write)
- proc_sys_poll_notify(table->poll);
-
return r;
}
#else
#define proc_do_uts_string NULL
#endif

-static DEFINE_CTL_TABLE_POLL(hostname_poll);
-static DEFINE_CTL_TABLE_POLL(domainname_poll);
-
static struct ctl_table uts_kern_table[] = {
{
.procname = "ostype",
@@ -93,7 +87,6 @@ static struct ctl_table uts_kern_table[] = {
.maxlen = sizeof(init_uts_ns.name.nodename),
.mode = 0644,
.proc_handler = proc_do_uts_string,
- .poll = &hostname_poll,
},
{
.procname = "domainname",
@@ -101,7 +94,6 @@ static struct ctl_table uts_kern_table[] = {
.maxlen = sizeof(init_uts_ns.name.domainname),
.mode = 0644,
.proc_handler = proc_do_uts_string,
- .poll = &domainname_poll,
},
{}
};
@@ -115,6 +107,8 @@ static struct ctl_table uts_root_table[] = {
{}
};

+static struct ctl_table_header *uts_header;
+
#ifdef CONFIG_PROC_SYSCTL
/*
* Notify userspace about a change in a certain entry of uts_kern_table,
@@ -124,13 +118,13 @@ void uts_proc_notify(enum uts_proc proc)
{
struct ctl_table *table = &uts_kern_table[proc];

- proc_sys_poll_notify(table->poll);
+ proc_sys_poll_notify(uts_header, table);
}
#endif

static int __init utsname_sysctl_init(void)
{
- register_sysctl_table(uts_root_table);
+ uts_header = register_sysctl_table(uts_root_table);
return 0;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/