[RFC 4/4] BloodTest: task

From: Hui Zhu
Date: Fri Oct 13 2017 - 04:57:46 EST


This patch add the function that get the infomation that task use the
resource of system for example cpu time, read_bytes, write_bytes.
The interface is in "/sys/kernel/debug/bloodtest/task".
"on" is the switch. When it set to 1, access "test" will record task
infomation.
After record, access "str" will get the record data in string.
Access "page" will get the record data in binary that is format is
in "bin_format".

Signed-off-by: Hui Zhu <zhuhui@xxxxxxxxxx>
---
include/linux/bloodtest.h | 10 +
kernel/bloodtest/Makefile | 2 +-
kernel/bloodtest/core.c | 21 +++
kernel/bloodtest/internal.h | 13 ++
kernel/bloodtest/perf.c | 33 +---
kernel/bloodtest/task.c | 447 ++++++++++++++++++++++++++++++++++++++++++++
kernel/exit.c | 4 +
7 files changed, 505 insertions(+), 25 deletions(-)
create mode 100644 include/linux/bloodtest.h
create mode 100644 kernel/bloodtest/task.c

diff --git a/include/linux/bloodtest.h b/include/linux/bloodtest.h
new file mode 100644
index 0000000..55f4ebc
--- /dev/null
+++ b/include/linux/bloodtest.h
@@ -0,0 +1,10 @@
+#ifndef __LINUX_BLOODTEST_H
+#define __LINUX_BLOODTEST_H
+
+#ifdef CONFIG_BLOODTEST
+extern void bt_task_exit_record(struct task_struct *p);
+#else
+static inline void bt_task_exit_record(struct task_struct *p) { }
+#endif
+
+#endif /* __LINUX_BLOODTEST_H */
diff --git a/kernel/bloodtest/Makefile b/kernel/bloodtest/Makefile
index 79b7ea0..a6f1a7a 100644
--- a/kernel/bloodtest/Makefile
+++ b/kernel/bloodtest/Makefile
@@ -1,3 +1,3 @@
-obj-y = core.o pages.o kernel_stat.o
+obj-y = core.o pages.o kernel_stat.o task.o

obj-$(CONFIG_PERF_EVENTS) += perf.o
diff --git a/kernel/bloodtest/core.c b/kernel/bloodtest/core.c
index 5ba800c..6cfcdf2 100644
--- a/kernel/bloodtest/core.c
+++ b/kernel/bloodtest/core.c
@@ -16,6 +16,7 @@
/* This function must be called under the protection of bt_lock. */
static void bt_insert(void)
{
+ bt_insert_task();
bt_insert_perf();
bt_insert_kernel_stat();
}
@@ -25,6 +26,7 @@ static void bt_pullout(void)
{
bt_pullout_kernel_stat();
bt_pullout_perf();
+ bt_pullout_task();
}

/* This function must be called under the protection of bt_lock. */
@@ -99,13 +101,32 @@ static int __init bt_init(void)
bt_ktime = ktime_set(1, 0);

ret = bt_perf_init(d);
+ if (ret < 0)
+ goto out;
+
+ ret = bt_task_init(d);

out:
if (ret != 0) {
debugfs_remove(t);
debugfs_remove(d);
+ pr_err("bloodtest: init get error %d\n", ret);
}
return ret;
}

core_initcall(bt_init);
+
+int bt_number_get(void *data, u64 *val)
+{
+ unsigned int *number_point = data;
+
+ down_read(&bt_lock);
+
+ *val = (u64)*number_point;
+
+ up_read(&bt_lock);
+
+ return 0;
+}
+
diff --git a/kernel/bloodtest/internal.h b/kernel/bloodtest/internal.h
index f6befc4..5aacf37 100644
--- a/kernel/bloodtest/internal.h
+++ b/kernel/bloodtest/internal.h
@@ -3,6 +3,13 @@

#include <linux/seq_file.h>

+#define SHOW_FORMAT_1(p, s, entry, type, sign, size) \
+ seq_printf(p, "%s format:%s %s offset:%lu size:%lu\n", \
+ #entry, #type, sign, offsetof(s, entry), \
+ (unsigned long)size)
+#define SHOW_FORMAT(p, s, entry, type, sign) \
+ SHOW_FORMAT_1(p, s, entry, type, sign, sizeof(type))
+
extern struct rw_semaphore bt_lock;

struct bt_pages {
@@ -45,4 +52,10 @@ static inline void bt_task_pullout_perf(void) { }
static inline int bt_perf_init(struct dentry *d) { return 0; }
#endif

+extern void bt_insert_task(void);
+extern void bt_pullout_task(void);
+extern int bt_task_init(struct dentry *d);
+
+extern int bt_number_get(void *data, u64 *val);
+
#endif /* _KERNEL_BLOODTEST_INTERNAL_H */
diff --git a/kernel/bloodtest/perf.c b/kernel/bloodtest/perf.c
index cf23844..d495258 100644
--- a/kernel/bloodtest/perf.c
+++ b/kernel/bloodtest/perf.c
@@ -40,20 +40,7 @@ struct perf_rec {
struct dentry *perf_dir;
struct dentry *perf_str_dir;

-static int perf_number_get(void *data, u64 *val)
-{
- unsigned int *number_point = data;
-
- down_read(&bt_lock);
-
- *val = (u64)*number_point;
-
- up_read(&bt_lock);
-
- return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(perf_number_fops, perf_number_get, NULL, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(perf_number_fops, bt_number_get, NULL, "%llu\n");

static void perf_overflow_handler(struct perf_event *event,
struct perf_sample_data *data,
@@ -402,7 +389,7 @@ static int perf_event_set(void *data, u64 val)
}

DEFINE_SIMPLE_ATTRIBUTE(perf_event_fops,
- perf_number_get,
+ bt_number_get,
perf_event_set, "%llu\n");

static int perf_bin_format_show(struct seq_file *p, void *unused)
@@ -412,16 +399,14 @@ static int perf_bin_format_show(struct seq_file *p, void *unused)
#else
seq_puts(p, "little-endian\n");
#endif
+ seq_printf(p, "page_size:%lu\n", PAGE_SIZE);
seq_printf(p, "size:%lu\n", sizeof(struct perf_entry));

- seq_printf(p, "pc format:u64 unsigned offset:%lu size:%lu\n",
- offsetof(struct perf_entry, pc), sizeof(u64));
- seq_printf(p, "is_user format:u8 unsigned offset:%lu size:%lu\n",
- offsetof(struct perf_entry, is_user), sizeof(u8));
- seq_printf(p, "oom_score_adj format:s16 signed offset:%lu size:%lu\n",
- offsetof(struct perf_entry, oom_score_adj), sizeof(s16));
- seq_printf(p, "comm format:char[] signed offset:%lu size:%d\n",
- offsetof(struct perf_entry, comm), TASK_COMM_LEN);
+ SHOW_FORMAT(p, struct perf_entry, pc, u64, "unsigned");
+ SHOW_FORMAT(p, struct perf_entry, is_user, u8, "unsigned");
+ SHOW_FORMAT(p, struct perf_entry, oom_score_adj, s16, "signed");
+ SHOW_FORMAT_1(p, struct perf_entry, comm, char[], "signed",
+ TASK_COMM_LEN);

return 0;
}
@@ -465,7 +450,7 @@ static int rec_max_set(void *data, u64 val)
}

DEFINE_SIMPLE_ATTRIBUTE(rec_max_fops,
- perf_number_get,
+ bt_number_get,
rec_max_set, "%llu\n");

void
diff --git a/kernel/bloodtest/task.c b/kernel/bloodtest/task.c
new file mode 100644
index 0000000..b44c892
--- /dev/null
+++ b/kernel/bloodtest/task.c
@@ -0,0 +1,447 @@
+#include <linux/debugfs.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/cputime.h>
+
+#include "internal.h"
+
+#define PROCESS_ERROR 1
+#define PROCESS_INSERT 2
+#define PROCESS_PULLOUT 4
+#define PROCESS_EXIT 8
+
+struct process_entry {
+ u8 status;
+ pid_t pid;
+ char comm[TASK_COMM_LEN];
+
+ u64 utime;
+ u64 stime;
+
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ u64 read_bytes;
+ u64 write_bytes;
+ u64 cancelled_write_bytes;
+#endif
+};
+
+static bool rec_on;
+static unsigned int rec_max = 1000;
+static DEFINE_SPINLOCK(rec_lock);
+static bool rec_running;
+static struct bt_pages rec_pages;
+static unsigned int rec_drop;
+
+static struct dentry *task_dir;
+static struct dentry *bin_dir;
+static struct dentry *str_dir;
+static struct dentry *number_dir;
+static struct dentry *page_dir;
+static struct dentry *drop_dir;
+
+static int task_bin_format_show(struct seq_file *p, void *unused)
+{
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ seq_puts(p, "big-endian\n");
+#else
+ seq_puts(p, "little-endian\n");
+#endif
+ seq_printf(p, "page_size:%lu\n", PAGE_SIZE);
+ seq_printf(p, "size:%lu\n", sizeof(struct process_entry));
+
+ SHOW_FORMAT(p, struct process_entry, status, u8, "unsigned");
+ SHOW_FORMAT(p, struct process_entry, pid, pid_t, "signed");
+ SHOW_FORMAT_1(p, struct process_entry, comm, char[], "signed",
+ TASK_COMM_LEN);
+
+ SHOW_FORMAT(p, struct process_entry, utime, u64, "unsigned");
+ SHOW_FORMAT(p, struct process_entry, stime, u64, "unsigned");
+
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ SHOW_FORMAT(p, struct process_entry, read_bytes, u64, "unsigned");
+ SHOW_FORMAT(p, struct process_entry, write_bytes, u64, "unsigned");
+ SHOW_FORMAT(p, struct process_entry, cancelled_write_bytes, u64,
+ "unsigned");
+#endif
+
+ return 0;
+}
+
+static int task_bin_format_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, task_bin_format_show, NULL);
+}
+
+static const struct file_operations task_bin_format_fops = {
+ .open = task_bin_format_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int task_str_show(struct seq_file *p, void *unused)
+{
+ struct process_entry *entry = NULL;
+ unsigned int index = 0;
+
+ while (1) {
+ entry = bt_pages_get_entry(&rec_pages, &index,
+ entry);
+ if (!entry)
+ break;
+
+ seq_printf(p, "comm:%s pid:%d\n", entry->comm, entry->pid);
+ seq_puts(p, "status:");
+ if (entry->status & PROCESS_ERROR)
+ seq_puts(p, "PROCESS_ERROR | ");
+ if (entry->status & PROCESS_INSERT)
+ seq_puts(p, "PROCESS_INSERT");
+ if (entry->status & PROCESS_PULLOUT)
+ seq_puts(p, "PROCESS_PULLOUT");
+ if (entry->status & PROCESS_EXIT)
+ seq_puts(p, "PROCESS_EXIT");
+ seq_puts(p, "\n");
+
+ seq_printf(p, "utime:%lld stime:%lld\n",
+ (unsigned long long)entry->utime,
+ (unsigned long long)entry->stime);
+
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ seq_printf(p, "read_bytes:%lld",
+ (unsigned long long)entry->read_bytes);
+ seq_printf(p, " write_bytes:%lld",
+ (unsigned long long)entry->write_bytes);
+ seq_printf(p, " cancelled_write_bytes:%lld\n",
+ (unsigned long long)entry->cancelled_write_bytes);
+#endif
+ seq_puts(p, "\n");
+ }
+
+ return 0;
+}
+
+static int task_str_open(struct inode *inode, struct file *file)
+{
+ down_read(&bt_lock);
+
+ return single_open(file, task_str_show, NULL);
+}
+
+static int task_str_release(struct inode *inode, struct file *file)
+{
+ int ret = single_release(inode, file);
+
+ up_read(&bt_lock);
+ return ret;
+}
+
+static const struct file_operations task_str_fops = {
+ .open = task_str_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = task_str_release,
+};
+
+DEFINE_SIMPLE_ATTRIBUTE(process_entry_num_fops, bt_pages_entry_num_get, NULL,
+ "%llu\n");
+
+DEFINE_SIMPLE_ATTRIBUTE(process_page_num_fops, bt_pages_page_num_get, NULL,
+ "%llu\n");
+
+DEFINE_SIMPLE_ATTRIBUTE(rec_number_fops,
+ bt_number_get,
+ NULL, "%llu\n");
+
+static int task_pages_alloc(void)
+{
+ return bt_pages_setup(&rec_pages, sizeof(struct process_entry),
+ rec_max, -1);
+}
+
+static void task_pages_release(void)
+{
+ bt_pages_release(&rec_pages);
+}
+
+static int task_alloc(void)
+{
+ int ret;
+
+ ret = task_pages_alloc();
+ if (ret)
+ goto out;
+
+ ret = -ENOMEM;
+
+ bin_dir = debugfs_create_file("bin", S_IRUSR, task_dir,
+ &rec_pages, &bt_pages_bin_fops);
+ if (!bin_dir)
+ goto out;
+
+ str_dir = debugfs_create_file("str", S_IRUSR, task_dir,
+ NULL, &task_str_fops);
+ if (!str_dir)
+ goto out;
+
+ number_dir = debugfs_create_file("number", S_IRUSR, task_dir,
+ &rec_pages, &process_entry_num_fops);
+ if (!number_dir)
+ goto out;
+
+ page_dir = debugfs_create_file("page", S_IRUSR, task_dir,
+ &rec_pages, &process_page_num_fops);
+ if (!page_dir)
+ goto out;
+
+ drop_dir = debugfs_create_file("drop", S_IRUSR, task_dir,
+ &rec_drop, &rec_number_fops);
+ if (!drop_dir)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static void task_release(void)
+{
+ debugfs_remove(bin_dir);
+ debugfs_remove(str_dir);
+ debugfs_remove(number_dir);
+ debugfs_remove(page_dir);
+ debugfs_remove(drop_dir);
+
+ task_pages_release();
+}
+
+static int task_on_set(void *data, u64 val)
+{
+ int ret = 0;
+
+ down_write(&bt_lock);
+
+ if (!rec_on && val) {
+ ret = task_alloc();
+ if (ret) {
+ task_release();
+ goto out;
+ }
+
+ rec_on = true;
+ } else if (rec_on && !val) {
+ task_release();
+
+ rec_on = false;
+ }
+
+out:
+ up_write(&bt_lock);
+ return ret;
+}
+
+static int task_on_get(void *data, u64 *val)
+{
+ down_read(&bt_lock);
+
+ if (rec_on)
+ *val = 1;
+ else
+ *val = 0;
+
+ up_read(&bt_lock);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(task_on_fops, task_on_get, task_on_set, "%llu\n");
+
+static int rec_max_set(void *data, u64 val)
+{
+ int ret = 0;
+
+ down_write(&bt_lock);
+ if (rec_max == val)
+ goto out;
+
+ rec_max = val;
+
+ if (!rec_on)
+ goto out;
+
+ task_pages_release();
+ ret = task_pages_alloc();
+ if (ret) {
+ task_release();
+ rec_on = false;
+ goto out;
+ }
+
+out:
+ up_write(&bt_lock);
+ return ret;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(rec_max_fops,
+ bt_number_get,
+ rec_max_set, "%llu\n");
+
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+static void
+entry_io_accounting_add(struct process_entry *entry,
+ struct task_io_accounting *io)
+{
+ entry->read_bytes += io->read_bytes;
+ entry->write_bytes += io->write_bytes;
+ entry->cancelled_write_bytes += io->cancelled_write_bytes;
+}
+#endif
+
+static void
+task_record_1(u8 status, struct task_struct *p)
+{
+ struct process_entry *entry;
+ unsigned long flags;
+
+ entry = bt_pages_alloc_entry(&rec_pages);
+ if (!entry) {
+ rec_drop++;
+ return;
+ }
+
+ entry->status = status;
+ entry->pid = p->pid;
+ strncpy(entry->comm, p->comm, sizeof(p->comm));
+
+ if (lock_task_sighand(p, &flags)) {
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ struct task_struct *t = p;
+#endif
+
+ thread_group_cputime_adjusted(p, &entry->utime,
+ &entry->stime);
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ entry->read_bytes = 0;
+ entry->write_bytes = 0;
+ entry->cancelled_write_bytes = 0;
+ entry_io_accounting_add(entry, &p->ioac);
+ entry_io_accounting_add(entry, &p->signal->ioac);
+ while_each_thread(p, t)
+ entry_io_accounting_add(entry, &t->signal->ioac);
+#endif
+ unlock_task_sighand(p, &flags);
+ } else {
+ entry->status |= PROCESS_ERROR;
+ entry->utime = 0;
+ entry->stime = 0;
+#ifdef CONFIG_TASK_IO_ACCOUNTING
+ entry->read_bytes = 0;
+ entry->write_bytes = 0;
+ entry->cancelled_write_bytes = 0;
+#endif
+ }
+}
+
+static void
+task_record(u8 status)
+{
+ struct task_struct *p;
+
+ rcu_read_lock();
+ for_each_process(p)
+ task_record_1(status, p);
+ rcu_read_unlock();
+}
+
+void bt_insert_task(void)
+{
+ unsigned long flags;
+
+ if (!rec_on)
+ return;
+
+ spin_lock_irqsave(&rec_lock, flags);
+
+ bt_pages_clear(&rec_pages);
+ rec_drop = 0;
+ rec_running = true;
+ task_record(PROCESS_INSERT);
+
+ spin_unlock_irqrestore(&rec_lock, flags);
+}
+
+void bt_pullout_task(void)
+{
+ unsigned long flags;
+
+ if (!rec_on)
+ return;
+
+ spin_lock_irqsave(&rec_lock, flags);
+
+ task_record(PROCESS_PULLOUT);
+ rec_running = false;
+
+ spin_unlock_irqrestore(&rec_lock, flags);
+}
+
+void bt_task_exit_record(struct task_struct *p)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rec_lock, flags);
+
+ if (!rec_running)
+ goto out;
+
+ rcu_read_lock();
+
+ if (!thread_group_leader(current))
+ p = current->group_leader;
+
+ task_record_1(PROCESS_EXIT, p);
+
+ rcu_read_unlock();
+
+out:
+ spin_unlock_irqrestore(&rec_lock, flags);
+}
+
+int __init bt_task_init(struct dentry *f)
+{
+ int ret = -ENOMEM;
+ struct dentry *on = NULL, *format = NULL, *max = NULL;
+
+ memset(&rec_pages, 0, sizeof(struct bt_pages));
+
+ task_dir = debugfs_create_dir("task", f);
+ if (!task_dir)
+ goto out;
+
+ on = debugfs_create_file("on", S_IRUSR | S_IWUSR, task_dir, NULL,
+ &task_on_fops);
+ if (!on)
+ goto out;
+
+ format = debugfs_create_file("bin_format", S_IRUSR,
+ task_dir, NULL,
+ &task_bin_format_fops);
+ if (!format)
+ return -ENOMEM;
+
+ max = debugfs_create_file("rec_max", S_IRUSR | S_IWUSR, task_dir,
+ &rec_max,
+ &rec_max_fops);
+ if (!max)
+ goto out;
+
+ ret = 0;
+out:
+ if (ret) {
+ debugfs_remove(on);
+ debugfs_remove(format);
+ debugfs_remove(max);
+ debugfs_remove(task_dir);
+ }
+ return ret;
+}
diff --git a/kernel/exit.c b/kernel/exit.c
index f2cd53e..513de91 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -62,6 +62,7 @@
#include <linux/random.h>
#include <linux/rcuwait.h>
#include <linux/compat.h>
+#include <linux/bloodtest.h>

#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -855,6 +856,9 @@ void __noreturn do_exit(long code)
acct_process();
trace_sched_process_exit(tsk);

+ if (group_dead)
+ bt_task_exit_record(tsk);
+
exit_sem(tsk);
exit_shm(tsk);
exit_files(tsk);
--
1.9.1