[RFC v2 11/83] Add timing and I/O statistics for performance analysis and profiling.

From: Andiry Xu
Date: Sat Mar 10 2018 - 13:43:22 EST


From: Andiry Xu <jix024@xxxxxxxxxxx>

Signed-off-by: Andiry Xu <jix024@xxxxxxxxxxx>
---
fs/nova/Makefile | 2 +-
fs/nova/nova.h | 12 +++
fs/nova/stats.c | 263 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nova/stats.h | 178 +++++++++++++++++++++++++++++++++++++
fs/nova/super.c | 6 ++
5 files changed, 460 insertions(+), 1 deletion(-)
create mode 100644 fs/nova/stats.c
create mode 100644 fs/nova/stats.h

diff --git a/fs/nova/Makefile b/fs/nova/Makefile
index eb19646..886356a 100644
--- a/fs/nova/Makefile
+++ b/fs/nova/Makefile
@@ -4,4 +4,4 @@

obj-$(CONFIG_NOVA_FS) += nova.o

-nova-y := bbuild.o inode.o rebuild.o super.o
+nova-y := bbuild.o inode.o rebuild.o stats.o super.o
diff --git a/fs/nova/nova.h b/fs/nova/nova.h
index ded9fe8..ba7ffca 100644
--- a/fs/nova/nova.h
+++ b/fs/nova/nova.h
@@ -48,6 +48,7 @@
#include <linux/pagevec.h>

#include "nova_def.h"
+#include "stats.h"

#define PAGE_SHIFT_2M 21
#define PAGE_SHIFT_1G 30
@@ -135,6 +136,10 @@ extern unsigned int nova_dbgmask;
#define ANY_CPU (65536)
#define FREE_BATCH (16)

+
+extern int measure_timing;
+
+
extern unsigned int blk_type_to_shift[NOVA_BLOCK_TYPE_MAX];
extern unsigned int blk_type_to_size[NOVA_BLOCK_TYPE_MAX];

@@ -306,4 +311,11 @@ static inline u64 nova_get_epoch_id(struct super_block *sb)
int nova_rebuild_inode(struct super_block *sb, struct nova_inode_info *si,
u64 ino, u64 pi_addr, int rebuild_dir);

+/* stats.c */
+void nova_get_timing_stats(void);
+void nova_get_IO_stats(void);
+void nova_print_timing_stats(struct super_block *sb);
+void nova_clear_stats(struct super_block *sb);
+void nova_print_inode(struct nova_inode *pi);
+
#endif /* __NOVA_H */
diff --git a/fs/nova/stats.c b/fs/nova/stats.c
new file mode 100644
index 0000000..4b7c317
--- /dev/null
+++ b/fs/nova/stats.c
@@ -0,0 +1,263 @@
+/*
+ * NOVA File System statistics
+ *
+ * Copyright 2015-2016 Regents of the University of California,
+ * UCSD Non-Volatile Systems Lab, Andiry Xu <jix024@xxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include "nova.h"
+
+const char *Timingstring[TIMING_NUM] = {
+ /* Init */
+ "================ Initialization ================",
+ "init",
+ "mount",
+ "ioremap",
+ "new_init",
+ "recovery",
+
+ /* Namei operations */
+ "============= Directory operations =============",
+ "create",
+ "lookup",
+ "link",
+ "unlink",
+ "symlink",
+ "mkdir",
+ "rmdir",
+ "mknod",
+ "rename",
+ "readdir",
+ "add_dentry",
+ "remove_dentry",
+ "setattr",
+ "setsize",
+
+ /* I/O operations */
+ "================ I/O operations ================",
+ "dax_read",
+ "cow_write",
+ "inplace_write",
+ "copy_to_nvmm",
+ "dax_get_block",
+ "read_iter",
+ "write_iter",
+
+ /* Memory operations */
+ "============== Memory operations ===============",
+ "memcpy_read_nvmm",
+ "memcpy_write_nvmm",
+ "memcpy_write_back_to_nvmm",
+ "handle_partial_block",
+
+ /* Memory management */
+ "============== Memory management ===============",
+ "alloc_blocks",
+ "new_data_blocks",
+ "new_log_blocks",
+ "free_blocks",
+ "free_data_blocks",
+ "free_log_blocks",
+
+ /* Transaction */
+ "================= Transaction ==================",
+ "transaction_new_inode",
+ "transaction_link_change",
+ "update_tail",
+
+ /* Logging */
+ "============= Logging operations ===============",
+ "append_dir_entry",
+ "append_file_entry",
+ "append_link_change",
+ "append_setattr",
+ "inplace_update_entry",
+
+ /* Tree */
+ "=============== Tree operations ================",
+ "checking_entry",
+ "assign_blocks",
+
+ /* GC */
+ "============= Garbage collection ===============",
+ "log_fast_gc",
+ "log_thorough_gc",
+ "check_invalid_log",
+
+ /* Others */
+ "================ Miscellaneous =================",
+ "find_cache_page",
+ "fsync",
+ "write_pages",
+ "fallocate",
+ "direct_IO",
+ "free_old_entry",
+ "delete_file_tree",
+ "delete_dir_tree",
+ "new_vfs_inode",
+ "new_nova_inode",
+ "free_inode",
+ "free_inode_log",
+ "evict_inode",
+
+ /* Mmap */
+ "=============== MMap operations ================",
+ "mmap_page_fault",
+ "mmap_pmd_fault",
+ "mmap_pfn_mkwrite",
+
+ /* Rebuild */
+ "=================== Rebuild ====================",
+ "rebuild_dir",
+ "rebuild_file",
+};
+
+u64 Timingstats[TIMING_NUM];
+DEFINE_PER_CPU(u64[TIMING_NUM], Timingstats_percpu);
+u64 Countstats[TIMING_NUM];
+DEFINE_PER_CPU(u64[TIMING_NUM], Countstats_percpu);
+u64 IOstats[STATS_NUM];
+DEFINE_PER_CPU(u64[STATS_NUM], IOstats_percpu);
+
+static void nova_print_IO_stats(struct super_block *sb)
+{
+ nova_info("=========== NOVA I/O stats ===========\n");
+ nova_info("Read %llu, bytes %llu, average %llu\n",
+ Countstats[dax_read_t], IOstats[read_bytes],
+ Countstats[dax_read_t] ?
+ IOstats[read_bytes] / Countstats[dax_read_t] : 0);
+ nova_info("COW write %llu, bytes %llu, average %llu, write breaks %llu, average %llu\n",
+ Countstats[cow_write_t], IOstats[cow_write_bytes],
+ Countstats[cow_write_t] ?
+ IOstats[cow_write_bytes] / Countstats[cow_write_t] : 0,
+ IOstats[cow_write_breaks], Countstats[cow_write_t] ?
+ IOstats[cow_write_breaks] / Countstats[cow_write_t]
+ : 0);
+ nova_info("Inplace write %llu, bytes %llu, average %llu, write breaks %llu, average %llu\n",
+ Countstats[inplace_write_t], IOstats[inplace_write_bytes],
+ Countstats[inplace_write_t] ?
+ IOstats[inplace_write_bytes] /
+ Countstats[inplace_write_t] : 0,
+ IOstats[inplace_write_breaks], Countstats[inplace_write_t] ?
+ IOstats[inplace_write_breaks] /
+ Countstats[inplace_write_t] : 0);
+}
+
+void nova_get_timing_stats(void)
+{
+ int i;
+ int cpu;
+
+ for (i = 0; i < TIMING_NUM; i++) {
+ Timingstats[i] = 0;
+ Countstats[i] = 0;
+ for_each_possible_cpu(cpu) {
+ Timingstats[i] += per_cpu(Timingstats_percpu[i], cpu);
+ Countstats[i] += per_cpu(Countstats_percpu[i], cpu);
+ }
+ }
+}
+
+void nova_get_IO_stats(void)
+{
+ int i;
+ int cpu;
+
+ for (i = 0; i < STATS_NUM; i++) {
+ IOstats[i] = 0;
+ for_each_possible_cpu(cpu)
+ IOstats[i] += per_cpu(IOstats_percpu[i], cpu);
+ }
+}
+
+void nova_print_timing_stats(struct super_block *sb)
+{
+ int i;
+
+ nova_get_timing_stats();
+ nova_get_IO_stats();
+
+ nova_info("=========== NOVA kernel timing stats ============\n");
+ for (i = 0; i < TIMING_NUM; i++) {
+ /* Title */
+ if (Timingstring[i][0] == '=') {
+ nova_info("\n%s\n\n", Timingstring[i]);
+ continue;
+ }
+
+ if (measure_timing || Timingstats[i]) {
+ nova_info("%s: count %llu, timing %llu, average %llu\n",
+ Timingstring[i],
+ Countstats[i],
+ Timingstats[i],
+ Countstats[i] ?
+ Timingstats[i] / Countstats[i] : 0);
+ } else {
+ nova_info("%s: count %llu\n",
+ Timingstring[i],
+ Countstats[i]);
+ }
+ }
+
+ nova_info("\n");
+ nova_print_IO_stats(sb);
+}
+
+static void nova_clear_timing_stats(void)
+{
+ int i;
+ int cpu;
+
+ for (i = 0; i < TIMING_NUM; i++) {
+ Countstats[i] = 0;
+ Timingstats[i] = 0;
+ for_each_possible_cpu(cpu) {
+ per_cpu(Timingstats_percpu[i], cpu) = 0;
+ per_cpu(Countstats_percpu[i], cpu) = 0;
+ }
+ }
+}
+
+static void nova_clear_IO_stats(struct super_block *sb)
+{
+ int i;
+ int cpu;
+
+ for (i = 0; i < STATS_NUM; i++) {
+ IOstats[i] = 0;
+ for_each_possible_cpu(cpu)
+ per_cpu(IOstats_percpu[i], cpu) = 0;
+ }
+}
+
+void nova_clear_stats(struct super_block *sb)
+{
+ nova_clear_timing_stats();
+ nova_clear_IO_stats(sb);
+}
+
+void nova_print_inode(struct nova_inode *pi)
+{
+ nova_dbg("%s: NOVA inode %llu\n", __func__, pi->nova_ino);
+ nova_dbg("valid %u, deleted %u, blk type %u, flags %u\n",
+ pi->valid, pi->deleted, pi->i_blk_type, pi->i_flags);
+ nova_dbg("size %llu, ctime %u, mtime %u, atime %u\n",
+ pi->i_size, pi->i_ctime, pi->i_mtime, pi->i_atime);
+ nova_dbg("mode %u, links %u, xattr 0x%llx\n",
+ pi->i_mode, pi->i_links_count, pi->i_xattr);
+ nova_dbg("uid %u, gid %u, gen %u, create time %u\n",
+ pi->i_uid, pi->i_gid, pi->i_generation, pi->i_create_time);
+ nova_dbg("head 0x%llx, tail 0x%llx\n",
+ pi->log_head, pi->log_tail);
+ nova_dbg("create epoch id %llu, delete epoch id %llu\n",
+ pi->create_epoch_id, pi->delete_epoch_id);
+}
diff --git a/fs/nova/stats.h b/fs/nova/stats.h
new file mode 100644
index 0000000..8dbd02d
--- /dev/null
+++ b/fs/nova/stats.h
@@ -0,0 +1,178 @@
+/*
+ * NOVA File System statistics
+ *
+ * Copyright 2015-2016 Regents of the University of California,
+ * UCSD Non-Volatile Systems Lab, Andiry Xu <jix024@xxxxxxxxxxx>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __STATS_H
+#define __STATS_H
+
+
+/* ======================= Timing ========================= */
+enum timing_category {
+ /* Init */
+ init_title_t,
+ init_t,
+ mount_t,
+ ioremap_t,
+ new_init_t,
+ recovery_t,
+
+ /* Namei operations */
+ namei_title_t,
+ create_t,
+ lookup_t,
+ link_t,
+ unlink_t,
+ symlink_t,
+ mkdir_t,
+ rmdir_t,
+ mknod_t,
+ rename_t,
+ readdir_t,
+ add_dentry_t,
+ remove_dentry_t,
+ setattr_t,
+ setsize_t,
+
+ /* I/O operations */
+ io_title_t,
+ dax_read_t,
+ cow_write_t,
+ inplace_write_t,
+ copy_to_nvmm_t,
+ dax_get_block_t,
+ read_iter_t,
+ write_iter_t,
+
+ /* Memory operations */
+ memory_title_t,
+ memcpy_r_nvmm_t,
+ memcpy_w_nvmm_t,
+ memcpy_w_wb_t,
+ partial_block_t,
+
+ /* Memory management */
+ mm_title_t,
+ new_blocks_t,
+ new_data_blocks_t,
+ new_log_blocks_t,
+ free_blocks_t,
+ free_data_t,
+ free_log_t,
+
+ /* Transaction */
+ trans_title_t,
+ create_trans_t,
+ link_trans_t,
+ update_tail_t,
+
+ /* Logging */
+ logging_title_t,
+ append_dir_entry_t,
+ append_file_entry_t,
+ append_link_change_t,
+ append_setattr_t,
+ update_entry_t,
+
+ /* Tree */
+ tree_title_t,
+ check_entry_t,
+ assign_t,
+
+ /* GC */
+ gc_title_t,
+ fast_gc_t,
+ thorough_gc_t,
+ check_invalid_t,
+
+ /* Others */
+ others_title_t,
+ find_cache_t,
+ fsync_t,
+ write_pages_t,
+ fallocate_t,
+ direct_IO_t,
+ free_old_t,
+ delete_file_tree_t,
+ delete_dir_tree_t,
+ new_vfs_inode_t,
+ new_nova_inode_t,
+ free_inode_t,
+ free_inode_log_t,
+ evict_inode_t,
+
+ /* Mmap */
+ mmap_title_t,
+ mmap_fault_t,
+ pmd_fault_t,
+ pfn_mkwrite_t,
+
+ /* Rebuild */
+ rebuild_title_t,
+ rebuild_dir_t,
+ rebuild_file_t,
+
+ /* Sentinel */
+ TIMING_NUM,
+};
+
+enum stats_category {
+ alloc_steps,
+ cow_write_breaks,
+ inplace_write_breaks,
+ read_bytes,
+ cow_write_bytes,
+ inplace_write_bytes,
+ fast_checked_pages,
+ thorough_checked_pages,
+ fast_gc_pages,
+ thorough_gc_pages,
+ dax_new_blocks,
+ inplace_new_blocks,
+ fdatasync,
+
+ /* Sentinel */
+ STATS_NUM,
+};
+
+extern const char *Timingstring[TIMING_NUM];
+extern u64 Timingstats[TIMING_NUM];
+DECLARE_PER_CPU(u64[TIMING_NUM], Timingstats_percpu);
+extern u64 Countstats[TIMING_NUM];
+DECLARE_PER_CPU(u64[TIMING_NUM], Countstats_percpu);
+extern u64 IOstats[STATS_NUM];
+DECLARE_PER_CPU(u64[STATS_NUM], IOstats_percpu);
+
+typedef struct timespec timing_t;
+
+#define NOVA_START_TIMING(name, start) \
+ {if (measure_timing) getrawmonotonic(&start); }
+
+#define NOVA_END_TIMING(name, start) \
+ {if (measure_timing) { \
+ timing_t end; \
+ getrawmonotonic(&end); \
+ __this_cpu_add(Timingstats_percpu[name], \
+ (end.tv_sec - start.tv_sec) * 1000000000 + \
+ (end.tv_nsec - start.tv_nsec)); \
+ } \
+ __this_cpu_add(Countstats_percpu[name], 1); \
+ }
+
+#define NOVA_STATS_ADD(name, value) \
+ {__this_cpu_add(IOstats_percpu[name], value); }
+
+
+
+#endif
diff --git a/fs/nova/super.c b/fs/nova/super.c
index e0e38ab..9295d23 100644
--- a/fs/nova/super.c
+++ b/fs/nova/super.c
@@ -40,8 +40,12 @@
#include "nova.h"
#include "super.h"

+int measure_timing;
int support_clwb;

+module_param(measure_timing, int, 0444);
+MODULE_PARM_DESC(measure_timing, "Timing measurement");
+
module_param(nova_dbgmask, int, 0444);
MODULE_PARM_DESC(nova_dbgmask, "Control debugging output");

@@ -500,6 +504,8 @@ static int nova_fill_super(struct super_block *sb, void *data, int silent)
goto out;
}

+ nova_dbg("measure timing %d\n", measure_timing);
+
get_random_bytes(&random, sizeof(u32));
atomic_set(&sbi->next_generation, random);

--
2.7.4