Re: GCC section alignment, and GCC-4.9 being a weird one

From: Peter Zijlstra
Date: Wed Oct 21 2020 - 09:45:03 EST


On Wed, Oct 21, 2020 at 03:18:06PM +0200, Jakub Jelinek wrote:

> As the aligned attribute is just on the type and not on the actual
> variables, I don't consider it a bug, GCC only guarantees it when
> the variable itself has user specified alignment.
> Otherwise the compiler can choose to align variables more if they
> are large for performance reasons (to help vectorization e.g. when
> the variable is copied around). Plus the ABI sometimes requires
> bigger alignment too (e.g. on x86_64 the psABI
> says that array variables larger than 15 bytes must have alignment at least
> 16 bytes).
>
> So, if you tweak the above testcase to have either
> struct sched_class a __attribute__((aligned(32)));
> or even remove it also from the sched_class struct, you should get
> the behavior you want even from GCC 4.9 and other GCC versions.
>
> And, if e.g. 32-byte alignment is not what you really need, but e.g.
> natural alignment of the struct sched_class would be sufficient, you could
> use __attribute__((aligned (alignof (struct sched_class)))) on the variables
> instead (perhaps wrapped in some macro).
>
> BTW, the 32 vs. 64 vs. whatever else byte alignment is heavily architecture
> and GCC version dependent, it is not that on all arches larger structures
> will be always 32 byte aligned no matter what.


Ah, thanks!

In that case something like the below ought to make it good.

I'll go feed it to the robots, see if anything falls over.

---
kernel/sched/deadline.c | 4 +++-
kernel/sched/fair.c | 4 +++-
kernel/sched/idle.c | 4 +++-
kernel/sched/rt.c | 4 +++-
kernel/sched/sched.h | 3 +--
kernel/sched/stop_task.c | 3 ++-
6 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6d93f4518734..f4855203143d 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2504,7 +2504,9 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
}

const struct sched_class dl_sched_class
- __attribute__((section("__dl_sched_class"))) = {
+ __attribute__((section("__dl_sched_class")))
+ __attribute__((aligned(__alignof__(struct sched_class)))) = {
+
.enqueue_task = enqueue_task_dl,
.dequeue_task = dequeue_task_dl,
.yield_task = yield_task_dl,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aa4c6227cd6d..9bfa9f545b9a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -11159,7 +11159,9 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
* All the scheduling class methods:
*/
const struct sched_class fair_sched_class
- __attribute__((section("__fair_sched_class"))) = {
+ __attribute__((section("__fair_sched_class")))
+ __attribute__((aligned(__alignof__(struct sched_class)))) = {
+
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index f324dc36fc43..c74ded2cabd2 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -458,7 +458,9 @@ static void update_curr_idle(struct rq *rq)
* Simple, special scheduling class for the per-CPU idle tasks:
*/
const struct sched_class idle_sched_class
- __attribute__((section("__idle_sched_class"))) = {
+ __attribute__((section("__idle_sched_class")))
+ __attribute__((aligned(__alignof__(struct sched_class)))) = {
+
/* no enqueue/yield_task for idle tasks */

/* dequeue is not valid, we print a debug message there: */
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f215eea6a966..002cdbfa5308 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2430,7 +2430,9 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
}

const struct sched_class rt_sched_class
- __attribute__((section("__rt_sched_class"))) = {
+ __attribute__((section("__rt_sched_class")))
+ __attribute__((aligned(__alignof__(struct sched_class)))) = {
+
.enqueue_task = enqueue_task_rt,
.dequeue_task = dequeue_task_rt,
.yield_task = yield_task_rt,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 28709f6b0975..42cf1e0cbf16 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -67,7 +67,6 @@
#include <linux/tsacct_kern.h>

#include <asm/tlb.h>
-#include <asm-generic/vmlinux.lds.h>

#ifdef CONFIG_PARAVIRT
# include <asm/paravirt.h>
@@ -1826,7 +1825,7 @@ struct sched_class {
#ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_change_group)(struct task_struct *p, int type);
#endif
-} __aligned(STRUCT_ALIGNMENT); /* STRUCT_ALIGN(), vmlinux.lds.h */
+};

static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
{
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 394bc8126a1e..7bac6e0a9212 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -110,7 +110,8 @@ static void update_curr_stop(struct rq *rq)
* Simple, special scheduling class for the per-CPU stop tasks:
*/
const struct sched_class stop_sched_class
- __attribute__((section("__stop_sched_class"))) = {
+ __attribute__((section("__stop_sched_class")))
+ __attribute__((aligned(__alignof__(struct sched_class)))) = {

.enqueue_task = enqueue_task_stop,
.dequeue_task = dequeue_task_stop,