[patch] implement smarter atime updates support

From: Ingo Molnar
Date: Sun Aug 05 2007 - 15:23:25 EST



* Ingo Molnar <mingo@xxxxxxx> wrote:

> tested it by moving the date forward:
>
> # date
> Sun Aug 5 22:55:14 CEST 2007
> # date -s "Tue Aug 7 22:55:14 CEST 2007"
> Tue Aug 7 22:55:14 CEST 2007
>
> access to a file did not generate disk IO before the date was set, and
> it generated exactly one IO after the date was set.
>
> ( should i perhaps reduce the number of boot options and only use a
> single "norelatime_default" boot option to turn this off? )

ok, cleaned it up some more: only a single, consistent boot option and
all the switches (be that config, boot or sysctl) are now called
"default_relatime". Also, got rid of that #ifdef ugliness in namespace.c
via a cleaner Kconfig solution (suggested by Peter Zijlstra).

Ingo

---------------------------->
Subject: [patch] implement smarter atime updates support
From: Ingo Molnar <mingo@xxxxxxx>

change relatime updates to be performed once per day. This makes
relatime a compatible solution for HSM, mailer-notification and
tmpwatch applications too.

also add the CONFIG_DEFAULT_RELATIME kernel option, which makes
"norelatime" the default for all mounts without an extra kernel
boot option.

add the "default_relatime=0" boot option to turn this off.

also add the /proc/sys/kernel/default_relatime flag which can be changed
runtime to modify the behavior of subsequent new mounts.

tested by moving the date forward:

# date
Sun Aug 5 22:55:14 CEST 2007
# date -s "Tue Aug 7 22:55:14 CEST 2007"
Tue Aug 7 22:55:14 CEST 2007

access to a file did not generate disk IO before the date was set, and
it generated exactly one IO after the date was set.

Signed-off-by: Ingo Molnar <mingo@xxxxxxx>
---
Documentation/kernel-parameters.txt | 4 +++
fs/Kconfig | 22 ++++++++++++++++
fs/inode.c | 48 ++++++++++++++++++++++++++----------
fs/namespace.c | 25 ++++++++++++++++++
include/linux/mount.h | 2 +
kernel/sysctl.c | 9 ++++++
6 files changed, 97 insertions(+), 13 deletions(-)

Index: linux/Documentation/kernel-parameters.txt
===================================================================
--- linux.orig/Documentation/kernel-parameters.txt
+++ linux/Documentation/kernel-parameters.txt
@@ -525,6 +525,10 @@ and is between 256 and 4096 characters.
This is a 16-member array composed of values
ranging from 0-255.

+ default_relatime=
+ [FS] mount all filesystems with relative atime
+ updates by default.
+
default_utf8= [VT]
Format=<0|1>
Set system-wide default UTF-8 mode for all tty's.
Index: linux/fs/Kconfig
===================================================================
--- linux.orig/fs/Kconfig
+++ linux/fs/Kconfig
@@ -2060,6 +2060,28 @@ config 9P_FS

endmenu

+config DEFAULT_RELATIME
+ bool "Mount all filesystems with relatime by default"
+ default y
+ help
+ If you say Y here, all your filesystems will be mounted
+ with the "relatime" mount option. This eliminates many atime
+ ('file last accessed' timestamp) updates (which otherwise
+ is performed on every file access and generates a write
+ IO to the inode) and thus speeds up IO. Atime is still updated,
+ but only once per day.
+
+ The mtime ('file last modified') and ctime ('file created')
+ timestamp are unaffected by this change.
+
+ Use the "norelatime" kernel boot option to turn off this
+ feature.
+
+config DEFAULT_RELATIME_VAL
+ int
+ default "1" if DEFAULT_RELATIME
+ default "0"
+
if BLOCK
menu "Partition Types"

Index: linux/fs/inode.c
===================================================================
--- linux.orig/fs/inode.c
+++ linux/fs/inode.c
@@ -1162,6 +1162,36 @@ sector_t bmap(struct inode * inode, sect
}
EXPORT_SYMBOL(bmap);

+/*
+ * With relative atime, only update atime if the
+ * previous atime is earlier than either the ctime or
+ * mtime.
+ */
+static int relatime_need_update(struct inode *inode, struct timespec now)
+{
+ /*
+ * Is mtime younger than atime? If yes, update atime:
+ */
+ if (timespec_compare(&inode->i_mtime, &inode->i_atime) >= 0)
+ return 1;
+ /*
+ * Is ctime younger than atime? If yes, update atime:
+ */
+ if (timespec_compare(&inode->i_ctime, &inode->i_atime) >= 0)
+ return 1;
+
+ /*
+ * Is the previous atime value older than a day? If yes,
+ * update atime:
+ */
+ if ((long)(now.tv_sec - inode->i_atime.tv_sec) >= 24*60*60)
+ return 1;
+ /*
+ * Good, we can skip the atime update:
+ */
+ return 0;
+}
+
/**
* touch_atime - update the access time
* @mnt: mount the inode is accessed on
@@ -1191,22 +1221,14 @@ void touch_atime(struct vfsmount *mnt, s
return;
if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))
return;
-
- if (mnt->mnt_flags & MNT_RELATIME) {
- /*
- * With relative atime, only update atime if the
- * previous atime is earlier than either the ctime or
- * mtime.
- */
- if (timespec_compare(&inode->i_mtime,
- &inode->i_atime) < 0 &&
- timespec_compare(&inode->i_ctime,
- &inode->i_atime) < 0)
+ }
+ now = current_fs_time(inode->i_sb);
+ if (mnt) {
+ if (mnt->mnt_flags & MNT_RELATIME)
+ if (!relatime_need_update(inode, now))
return;
- }
}

- now = current_fs_time(inode->i_sb);
if (timespec_equal(&inode->i_atime, &now))
return;

Index: linux/fs/namespace.c
===================================================================
--- linux.orig/fs/namespace.c
+++ linux/fs/namespace.c
@@ -1107,6 +1107,8 @@ int do_add_mount(struct vfsmount *newmnt
goto unlock;

newmnt->mnt_flags = mnt_flags;
+ WARN_ON_ONCE(newmnt->mnt_flags & MNT_RELATIME);
+
if ((err = graft_tree(newmnt, nd)))
goto unlock;

@@ -1362,6 +1364,24 @@ int copy_mount_options(const void __user
}

/*
+ * Allow users to disable (or enable) atime updates via a .config
+ * option or via the boot line, or via /proc/sys/fs/default_relatime:
+ */
+int default_relatime __read_mostly = CONFIG_DEFAULT_RELATIME_VAL;
+
+static int __init set_default_relatime(char *str)
+{
+ get_option(&str, &default_relatime);
+
+ printk(KERN_INFO "Mount all filesystems with"
+ "default relative atime updates: %s.\n",
+ default_relatime ? "enabled" : "disabled");
+
+ return 1;
+}
+__setup("default_relatime=", set_default_relatime);
+
+/*
* Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
* be given to the mount() call (ie: read-only, no-dev, no-suid etc).
*
@@ -1409,6 +1429,11 @@ long do_mount(char *dev_name, char *dir_
mnt_flags |= MNT_NODIRATIME;
if (flags & MS_RELATIME)
mnt_flags |= MNT_RELATIME;
+ else if (default_relatime &&
+ !(flags & (MNT_NOATIME | MNT_NODIRATIME))) {
+ mnt_flags |= MNT_RELATIME;
+ flags |= MS_RELATIME;
+ }

flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME);
Index: linux/include/linux/mount.h
===================================================================
--- linux.orig/include/linux/mount.h
+++ linux/include/linux/mount.h
@@ -103,5 +103,7 @@ extern void shrink_submounts(struct vfsm
extern spinlock_t vfsmount_lock;
extern dev_t name_to_dev_t(char *name);

+extern int default_relatime;
+
#endif
#endif /* _LINUX_MOUNT_H */
Index: linux/kernel/sysctl.c
===================================================================
--- linux.orig/kernel/sysctl.c
+++ linux/kernel/sysctl.c
@@ -30,6 +30,7 @@
#include <linux/capability.h>
#include <linux/smp_lock.h>
#include <linux/fs.h>
+#include <linux/mount.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kobject.h>
@@ -1206,6 +1207,14 @@ static ctl_table fs_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "default_relatime",
+ .data = &default_relatime,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
{
.ctl_name = CTL_UNNUMBERED,
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/