Packable data structures found by pahole

From: Arnaldo Carvalho de Melo
Date: Wed Feb 11 2009 - 08:51:18 EST


Em Wed, Feb 11, 2009 at 01:22:36PM +0100, Ingo Molnar escreveu:
> Is there anything packable in core kernel structures like task struct?

I still haven't added an heuristic to avoid reporting members with
explicit __alignment attributes, as these are not encoded in DWARF. I'll
work on that soon, but till then we can use this as an starting point.

struct name, current size, --reorganized size, savings

$ pahole --packable ../build/blkftrace/vmlinux | sort -k4 -nr
vc_data 432 176 256
is this exported to userspace?

rcu_ctrlblk 128 64 64
has ____cacheline_internodealigned_in_smp

timex 208 152 56
syscall interface

hh_cache 128 72 56
has ____cacheline_aligned_in_smp

cpu_workqueue_struct 128 72 56
is ____cacheline_aligned

rchan_buf 256 216 40
is ____cacheline_aligned

tty_struct 1328 1296 32
this one doesn't have any annotation, looks ripe for --reorganize

task_struct 6008 5976 32

Printing this one here, the rest of the possibly packable data
structures are after it:

struct task_struct {
volatile long int state; /* 0 8 */
void * stack; /* 8 8 */
atomic_t usage; /* 16 4 */
unsigned int flags; /* 20 4 */
unsigned int ptrace; /* 24 4 */
int lock_depth; /* 28 4 */
int prio; /* 32 4 */
int static_prio; /* 36 4 */
int normal_prio; /* 40 4 */
unsigned int rt_priority; /* 44 4 */
const struct sched_class * sched_class; /* 48 8 */
struct sched_entity se; /* 56 368 */
/* --- cacheline 6 boundary (384 bytes) was 40 bytes ago --- */
struct sched_rt_entity rt; /* 424 64 */
/* --- cacheline 7 boundary (448 bytes) was 40 bytes ago --- */
unsigned char fpu_counter; /* 488 1 */
s8 oomkilladj; /* 489 1 */

/* XXX 2 bytes hole, try to pack */

unsigned int btrace_seq; /* 492 4 */
unsigned int policy; /* 496 4 */

/* XXX 4 bytes hole, try to pack */

cpumask_t cpus_allowed; /* 504 8 */
/* --- cacheline 8 boundary (512 bytes) --- */
struct sched_info sched_info; /* 512 40 */

/* XXX last struct has 4 bytes of padding */

struct list_head tasks; /* 552 16 */
struct plist_node pushable_tasks; /* 568 40 */
/* --- cacheline 9 boundary (576 bytes) was 32 bytes ago --- */
struct mm_struct * mm; /* 608 8 */
struct mm_struct * active_mm; /* 616 8 */
struct linux_binfmt * binfmt; /* 624 8 */
int exit_state; /* 632 4 */
int exit_code; /* 636 4 */
/* --- cacheline 10 boundary (640 bytes) --- */
int exit_signal; /* 640 4 */
int pdeath_signal; /* 644 4 */
unsigned int personality; /* 648 4 */
unsigned int did_exec:1; /* 652:31 4 */

/* XXX 31 bits hole, try to pack */

pid_t pid; /* 656 4 */
pid_t tgid; /* 660 4 */
long unsigned int stack_canary; /* 664 8 */
struct task_struct * real_parent; /* 672 8 */
struct task_struct * parent; /* 680 8 */
struct list_head children; /* 688 16 */
/* --- cacheline 11 boundary (704 bytes) --- */
struct list_head sibling; /* 704 16 */
struct task_struct * group_leader; /* 720 8 */
struct list_head ptraced; /* 728 16 */
struct list_head ptrace_entry; /* 744 16 */
struct bts_tracer * bts; /* 760 8 */
/* --- cacheline 12 boundary (768 bytes) --- */
void * bts_buffer; /* 768 8 */
size_t bts_size; /* 776 8 */
struct pid_link pids[3]; /* 784 72 */
/* --- cacheline 13 boundary (832 bytes) was 24 bytes ago --- */
struct list_head thread_group; /* 856 16 */
struct completion * vfork_done; /* 872 8 */
int * set_child_tid; /* 880 8 */
int * clear_child_tid; /* 888 8 */
/* --- cacheline 14 boundary (896 bytes) --- */
cputime_t utime; /* 896 8 */
cputime_t stime; /* 904 8 */
cputime_t utimescaled; /* 912 8 */
cputime_t stimescaled; /* 920 8 */
cputime_t gtime; /* 928 8 */
cputime_t prev_utime; /* 936 8 */
cputime_t prev_stime; /* 944 8 */
long unsigned int nvcsw; /* 952 8 */
/* --- cacheline 15 boundary (960 bytes) --- */
long unsigned int nivcsw; /* 960 8 */
struct timespec start_time; /* 968 16 */
struct timespec real_start_time; /* 984 16 */
long unsigned int min_flt; /* 1000 8 */
long unsigned int maj_flt; /* 1008 8 */
struct task_cputime cputime_expires; /* 1016 24 */
/* --- cacheline 16 boundary (1024 bytes) was 16 bytes ago --- */
struct list_head cpu_timers[3]; /* 1040 48 */
/* --- cacheline 17 boundary (1088 bytes) --- */
const struct cred * real_cred; /* 1088 8 */
const struct cred * cred; /* 1096 8 */
struct mutex cred_exec_mutex; /* 1104 32 */
char comm[16]; /* 1136 16 */
/* --- cacheline 18 boundary (1152 bytes) --- */
int link_count; /* 1152 4 */
int total_link_count; /* 1156 4 */
struct sysv_sem sysvsem; /* 1160 8 */
long unsigned int last_switch_count; /* 1168 8 */
struct thread_struct thread; /* 1176 208 */

/* XXX last struct has 4 bytes of padding */

/* --- cacheline 21 boundary (1344 bytes) was 40 bytes ago --- */
struct fs_struct * fs; /* 1384 8 */
struct files_struct * files; /* 1392 8 */
struct nsproxy * nsproxy; /* 1400 8 */
/* --- cacheline 22 boundary (1408 bytes) --- */
struct signal_struct * signal; /* 1408 8 */
struct sighand_struct * sighand; /* 1416 8 */
sigset_t blocked; /* 1424 8 */
sigset_t real_blocked; /* 1432 8 */
sigset_t saved_sigmask; /* 1440 8 */
struct sigpending pending; /* 1448 24 */
/* --- cacheline 23 boundary (1472 bytes) --- */
long unsigned int sas_ss_sp; /* 1472 8 */
size_t sas_ss_size; /* 1480 8 */
int (*notifier)(void *); /* 1488 8 */
void * notifier_data; /* 1496 8 */
sigset_t * notifier_mask; /* 1504 8 */
struct audit_context * audit_context; /* 1512 8 */
uid_t loginuid; /* 1520 4 */
unsigned int sessionid; /* 1524 4 */
seccomp_t seccomp; /* 1528 4 */
u32 parent_exec_id; /* 1532 4 */
/* --- cacheline 24 boundary (1536 bytes) --- */
u32 self_exec_id; /* 1536 4 */
spinlock_t alloc_lock; /* 1540 4 */
spinlock_t pi_lock; /* 1544 4 */

/* XXX 4 bytes hole, try to pack */

struct plist_head pi_waiters; /* 1552 32 */
struct rt_mutex_waiter * pi_blocked_on; /* 1584 8 */
unsigned int irq_events; /* 1592 4 */
int hardirqs_enabled; /* 1596 4 */
/* --- cacheline 25 boundary (1600 bytes) --- */
long unsigned int hardirq_enable_ip; /* 1600 8 */
unsigned int hardirq_enable_event; /* 1608 4 */

/* XXX 4 bytes hole, try to pack */

long unsigned int hardirq_disable_ip; /* 1616 8 */
unsigned int hardirq_disable_event; /* 1624 4 */
int softirqs_enabled; /* 1628 4 */
long unsigned int softirq_disable_ip; /* 1632 8 */
unsigned int softirq_disable_event; /* 1640 4 */

/* XXX 4 bytes hole, try to pack */

long unsigned int softirq_enable_ip; /* 1648 8 */
unsigned int softirq_enable_event; /* 1656 4 */
int hardirq_context; /* 1660 4 */
/* --- cacheline 26 boundary (1664 bytes) --- */
int softirq_context; /* 1664 4 */

/* XXX 4 bytes hole, try to pack */

void * journal_info; /* 1672 8 */
struct bio * bio_list; /* 1680 8 */
struct bio * * bio_tail; /* 1688 8 */
struct reclaim_state * reclaim_state; /* 1696 8 */
struct backing_dev_info * backing_dev_info; /* 1704 8 */
struct io_context * io_context; /* 1712 8 */
long unsigned int ptrace_message; /* 1720 8 */
/* --- cacheline 27 boundary (1728 bytes) --- */
siginfo_t * last_siginfo; /* 1728 8 */
struct task_io_accounting ioac; /* 1736 56 */
/* --- cacheline 28 boundary (1792 bytes) --- */
u64 acct_rss_mem1; /* 1792 8 */
u64 acct_vm_mem1; /* 1800 8 */
cputime_t acct_timexpd; /* 1808 8 */
nodemask_t mems_allowed; /* 1816 64 */
/* --- cacheline 29 boundary (1856 bytes) was 24 bytes ago --- */
int cpuset_mems_generation; /* 1880 4 */
int cpuset_mem_spread_rotor; /* 1884 4 */
struct css_set * cgroups; /* 1888 8 */
struct list_head cg_list; /* 1896 16 */
struct robust_list_head * robust_list; /* 1912 8 */
/* --- cacheline 30 boundary (1920 bytes) --- */
struct compat_robust_list_head * compat_robust_list; /* 1920 8 */
struct list_head pi_state_list; /* 1928 16 */
struct futex_pi_state * pi_state_cache; /* 1944 8 */
struct perf_counter_context perf_counter_ctx; /* 1952 80 */
/* --- cacheline 31 boundary (1984 bytes) was 48 bytes ago --- */
struct mempolicy * mempolicy; /* 2032 8 */
short int il_next; /* 2040 2 */

/* XXX 2 bytes hole, try to pack */

atomic_t fs_excl; /* 2044 4 */
/* --- cacheline 32 boundary (2048 bytes) --- */
struct rcu_head rcu; /* 2048 16 */
struct pipe_inode_info * splice_pipe; /* 2064 8 */
struct task_delay_info * delays; /* 2072 8 */
struct prop_local_single dirties; /* 2080 24 */
int latency_record_count; /* 2104 4 */

/* XXX 4 bytes hole, try to pack */

/* --- cacheline 33 boundary (2112 bytes) --- */
struct latency_record latency_record[32]; /* 2112 3840 */
/* --- cacheline 93 boundary (5952 bytes) --- */
long unsigned int timer_slack_ns; /* 5952 8 */
long unsigned int default_timer_slack_ns; /* 5960 8 */
struct list_head * scm_work_list; /* 5968 8 */
int curr_ret_stack; /* 5976 4 */

/* XXX 4 bytes hole, try to pack */

struct ftrace_ret_stack * ret_stack; /* 5984 8 */
atomic_t trace_overrun; /* 5992 4 */
atomic_t tracing_graph_pause; /* 5996 4 */
long unsigned int trace; /* 6000 8 */

/* size: 6008, cachelines: 94, members: 148 */
/* sum members: 5976, holes: 9, sum holes: 32 */
/* bit holes: 1, sum bit holes: 31 bits */
/* paddings: 2, sum paddings: 8 */
/* last cacheline: 56 bytes */
}; /* definitions: 742 */

If we ask pahole to reorganize it it would do these steps:

$ pahole -C task_struct --reorganize --show_reorg_steps kernel/sched.o|grep ^\/
/* Demoting bitfield ('did_exec' ... 'did_exec') from 'unsigned int' to
* 'unsigned char' */

/* Moving bitfield('did_exec' ... 'did_exec') from after 'personality'
* to after 'oomkilladj' */

/* Moving 'personality' from after 'pdeath_signal' to after 'policy' */

/* Moving 'hardirq_enable_event' from after 'hardirq_enable_ip' to after
* 'pi_lock' */

/* Moving 'softirq_context' from after 'hardirq_context' to after
* 'softirq_disable_event' */

/* Moving 'curr_ret_stack' from after 'scm_work_list' to after
* 'latency_record_count' */

And the new stats would be:

/* size: 5976, cachelines: 94, members: 148 */
/* sum members: 5973, holes: 2, sum holes: 3 */
/* bit holes: 1, sum bit holes: 7 bits */
/* paddings: 2, sum paddings: 8 */
/* last cacheline: 24 bytes */
}; /* saved 32 bytes! */

It would still have these holes/paddings:

<SNIP>

s8 oomkilladj; /* 489 1 */
unsigned char did_exec:1; /* 490: 7 1 */

/* XXX 7 bits hole, try to pack */
/* XXX 1 byte hole, try to pack */

unsigned int btrace_seq; /* 492 4 */

<SNIP>

/* --- cacheline 8 boundary (512 bytes) --- */
struct sched_info sched_info; /* 512 40 */

/* XXX last struct has 4 bytes of padding */

struct list_head tasks; /* 552 16 */

<SNIP>

long unsigned int last_switch_count; /* 1160 8 */
struct thread_struct thread; /* 1168 208 */

/* XXX last struct has 4 bytes of padding */

/* --- cacheline 21 boundary (1344 bytes) was 32 bytes ago --- */

<SNIP>

/* --- cacheline 31 boundary (1984 bytes) was 24 bytes ago --- */
struct mempolicy * mempolicy; /* 2008 8 */
short int il_next; /* 2016 2 */

/* XXX 2 bytes hole, try to pack */

atomic_t fs_excl; /* 2020 4 */

<SNIP>

I put the pahole vmlinux output on
http://fedorapeople.org/~acme/pahole/vmlinux.pahole.c

zone 1536 1512 24
super_block 768 744 24
Scsi_Host 1384 1360 24
scsi_device 1312 1288 24
rq 2456 2432 24
request_queue 2272 2248 24
net_device 1600 1576 24
cp_private 1344 1320 24
clocksource 192 168 24
ata_port 11184 11160 24
taskstats 328 312 16
sock 544 528 16
rtl8139_private 448 432 16
rtentry 120 104 16
pci_dev 1624 1608 16
packet_sock 760 744 16
mtd_info 352 336 16
mousedev 784 768 16
module 512 496 16
mm_struct 808 792 16
loop_device 400 384 16
journal_s 568 552 16
gendisk 720 704 16
floppy_drive_params 128 112 16
files_struct 704 688 16
dio 856 840 16
block_device 248 232 16
audit_context 1968 1952 16
xfrm_state 632 624 8
writeback_control 64 56 8
vt_spawn_console 24 16 8
vmap_block_queue 48 40 8
vfsmount 224 216 8
user_struct 96 88 8
unix_skb_parms 32 24 8
unity_map_entry 48 40 8
uart_port 200 192 8
tty_ldisc_ops 144 136 8
tty_bufhead 152 144 8
tty_audit_buf 72 64 8
transaction_s 168 160 8
tick_sched 248 240 8
thread_struct 208 200 8
sysfs_dirent 80 72 8
sk_buff 192 184 8
signal_struct 944 936 8
sighand_struct 2088 2080 8
sg_io_hdr 88 80 8
serio 704 696 8
semid_ds 88 80 8
scsi_target 616 608 8
scsi_pointer 64 56 8
scm_cookie 40 32 8
rt_rq 1760 1752 8
rtc_device 744 736 8
root_domain 1704 1696 8
ring_buffer_per_cpu 112 104 8
ring_buffer 72 64 8
request 368 360 8
rchan 376 368 8
psmouse_protocol 48 40 8
proto 336 328 8
protection_domain 48 40 8
prop_local_percpu 64 56 8
proc_dir_entry 160 152 8
power_supply 112 104 8
pnp_card 632 624 8
platform_device 520 512 8
pid_namespace 2112 2104 8
pglist_data 80576 80568 8
perf_counter_context 80 72 8
perf_counter 4408 4400 8
pci_root_info 40 32 8
old_serial_port 40 32 8
net 592 584 8
neigh_table 472 464 8
neighbour 240 232 8
ncp_mount_data_v4 80 72 8
mtd_oob_ops 64 56 8
msghdr 56 48 8
mnt_namespace 64 56 8
ml_device 888 880 8
loop_info 168 160 8
kprobe 128 120 8
kparam_array 48 40 8
kmem_cache 4352 4344 8
irq_desc 192 184 8
ip_sf_list 40 32 8
ip_mc_list 168 160 8
ipc_namespace 296 288 8
input_dev 2352 2344 8
inode 560 552 8
inet_timewait_death_row 568 560 8
inet6_ifaddr 184 176 8
in_device 376 368 8
i387_soft_struct 136 128 8
hrtimer_cpu_base 160 152 8
hid_field 112 104 8
hid_device 7144 7136 8
gen_estimator 112 104 8
fs_quota_stat 80 72 8
floppy_write_errors 40 32 8
floppy_fdc_state 40 32 8
flock 32 24 8
fb_info 712 704 8
ext3_sb_info 440 432 8
ext3_inode_info 768 760 8
dquot 232 224 8
cpuinfo_x86 192 184 8
clock_event_device 128 120 8
cdrom_generic_command 64 56 8
cache_detail 224 216 8
bsg_device 160 152 8
bsg_class_device 48 40 8
blk_user_trace_setup 72 64 8
blk_trace 96 88 8
blkcipher_walk 112 104 8
audit_watch 72 64 8
atkbd 1488 1480 8
ata_queued_cmd 224 216 8
ata_host 72 64 8
ata_device 1168 1160 8
as_io_context 104 96 8
amd_iommu 120 112 8
agp_kern_info 80 72 8
agp_bridge_data 200 192 8
acpi_thermal 1472 1464 8
acpi_pscope_state 56 48 8
acpi_prt_entry 48 40 8
acpi_processor_power 2112 2104 8
acpi_processor_performance 112 104 8
acpi_processor_cx 136 128 8
acpi_blacklist_item 56 48 8
tty_port 136 132 4
scsi_host_cmd_pool 48 44 4
rtentry32 84 80 4
msqid_ds 104 100 4
inotify_watch 64 60 4
in6_rtmsg 80 76 4
fown_struct 32 28 4
fib_iter_state 56 52 4
entropy_store 56 52 4
compat_ncp_mount_data 56 52 4
compat_loop_info 140 136 4
compat_floppy_fdc_state 32 28 4
compat_floppy_drive_params 88 84 4
agp_allocate 24 20 4
acpi_parse_obj_named 72 68 4
fb_monspecs 144 141 3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/