Re: fuse uring / wake_up on the same core

From: Bernd Schubert
Date: Mon May 01 2023 - 17:44:57 EST


On 4/28/23 23:54, Bernd Schubert wrote:
> On 4/28/23 03:44, Hillf Danton wrote:
>>    restart:
>>       for (;;) {
>>           spin_lock(&fiq->lock);
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -953,6 +953,7 @@ struct task_struct {
>>       /* delay due to memory thrashing */
>>       unsigned                        in_thrashing:1;
>>   #endif
>> +    unsigned             seesaw:1;
>>       unsigned long            atomic_flags; /* Flags requiring atomic
>> access. */
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -7424,6 +7424,8 @@ select_task_rq_fair(struct task_struct *
>>       if (wake_flags & WF_TTWU) {
>>           record_wakee(p);
>> +        if (p->seesaw && current->seesaw)
>> +            return cpu;
>>           if (sched_energy_enabled()) {
>>               new_cpu = find_energy_efficient_cpu(p, prev_cpu);
>>               if (new_cpu >= 0)
>
>
> Hmm, WF_CURRENT_CPU works rather similar, except that it tests if cpu is
> in cpus_ptr?  The combination of both patches results in
>
>         if (p->seesaw && current->seesaw)
>             return cpu;
>
>         if ((wake_flags & WF_CURRENT_CPU) &&
>             cpumask_test_cpu(cpu, p->cpus_ptr))
>             return cpu;
>
>
>
> While writing the mail kernel compilation is ready, but it got late,
> will test in the morning.

This works wonders! The fuse-uring part is this

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cd7aa679c3ee..ec5853ca9646 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -373,6 +373,9 @@ static void request_wait_answer(struct fuse_req *req)
int err;
int prev_cpu = task_cpu(current);

+ if (fc->ring.per_core_queue)
+ current->seesaw = 1;
+
if (!fc->no_interrupt) {
/* Any signal may interrupt this */
err = wait_event_interruptible(req->waitq,
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 7d327699b4c5..715741ed58bf 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -1312,6 +1312,13 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
/* XXX error injection or test with malicious daemon */
}

+ /* In combination with requesting process (application) seesaw
+ * setting (see request_wait_answer), the application will
+ * stay on the same core.
+ */
+ if (fc->ring.per_core_queue)
+ current->seesaw = 1;
+
ret = fuse_uring_fetch(ring_ent, cmd);
break;
case FUSE_URING_REQ_COMMIT_AND_FETCH:




I'm not familiar at all with scheduler code,
given this works perfectly this suggests the same function is also
called without explicit waitq, when the scheduler preempts a task?

I think there might be side effects - what is if multiple
applications are on one core and another core would be available?
With this flag they would stay on the same core? Maybe better two flags?

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 63d242164b1a..07783ddaec5c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -953,6 +953,8 @@ struct task_struct {
/* delay due to memory thrashing */
unsigned in_thrashing:1;
#endif
+ unsigned seesaw_req:1;
+ unsigned seesaw_io:1;

unsigned long atomic_flags; /* Flags requiring atomic access. */

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b9d6ed7585c6..474bf3657ef0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7605,6 +7605,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
if (wake_flags & WF_TTWU) {
record_wakee(p);

+ /* current is handling requests on behalf of the waking process,
+ * both want to run on the same core in seeswaw manner.
+ */
+ if (p->seesaw_req && current->seesaw_io &&
+ cpumask_test_cpu(cpu, p->cpus_ptr))
+ return cpu;
+
if ((wake_flags & WF_CURRENT_CPU) &&
cpumask_test_cpu(cpu, p->cpus_ptr))
return cpu;

(not tested yet)


Thanks,
Bernd