[PATCH 2/4] exec: Assign unshared files after there is no way back

From: Kirill Tkhai
Date: Thu Jan 11 2018 - 10:53:09 EST


The patch makes unshared files_struct to be assigned
after exec or coredump are known to be successeful.
Since previous patch converted all load_binary methods
to use linux_binprm::new_files instead of current->files,
now we are able to assign unshared_files after load_binary
call.

This change makes task->files more predictable and since
that we may analyse the only thread's files pointer
to determ either the task contains a fd or not, and not
be afraid of race with failing exec. Next patch will
use this feature to skip thread group iteration in __do_SAK(),
and also this predictability may be useful for something else.

Also note, that without the patch we skip failing exec,
when we are doing the iterations in __do_SAK(), and
these tasks remain alive. The patch also fixes this rare issue.

Signed-off-by: Kirill Tkhai <ktkhai@xxxxxxxxxxxxx>
---
fs/binfmt_misc.c | 2 --
fs/coredump.c | 9 +++++----
fs/exec.c | 15 ++++++++-------
fs/file.c | 2 +-
include/linux/fdtable.h | 4 ++--
kernel/fork.c | 19 +++++++------------
6 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 13c1fae45717..7568456895c7 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -242,8 +242,6 @@ static int load_misc_binary(struct linux_binprm *bprm)
dput(fmt->dentry);
return retval;
error:
- if (fd_binary > 0)
- sys_close(fd_binary);
bprm->interp_flags = 0;
bprm->interp_data = 0;
goto ret;
diff --git a/fs/coredump.c b/fs/coredump.c
index 1e2c87acac9b..fe5a4504d975 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -546,7 +546,7 @@ void do_coredump(const siginfo_t *siginfo)
struct cred *cred;
int retval = 0;
int ispipe;
- struct files_struct *displaced;
+ struct files_struct *files;
/* require nonrelative corefile path and be extra careful */
bool need_suid_safe = false;
bool core_dumped = false;
@@ -747,11 +747,12 @@ void do_coredump(const siginfo_t *siginfo)
}

/* get us an unshared descriptor table; almost always a no-op */
- retval = unshare_files(&displaced);
+ files = unshare_files();
+ retval = PTR_ERR_OR_ZERO(files);
if (retval)
goto close_fail;
- if (displaced)
- put_files_struct(displaced);
+ if (files != current->files)
+ assign_files_struct(files);
if (!dump_interrupted()) {
file_start_write(cprm.file);
core_dumped = binfmt->core_dump(&cprm);
diff --git a/fs/exec.c b/fs/exec.c
index 4c3b924ae103..ced4dc09444a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1699,7 +1699,7 @@ static int do_execveat_common(int fd, struct filename *filename,
char *pathbuf = NULL;
struct linux_binprm *bprm;
struct file *file;
- struct files_struct *displaced;
+ struct files_struct *files;
int retval;

if (IS_ERR(filename))
@@ -1721,7 +1721,8 @@ static int do_execveat_common(int fd, struct filename *filename,
* further execve() calls fail. */
current->flags &= ~PF_NPROC_EXCEEDED;

- retval = unshare_files(&displaced);
+ files = unshare_files();
+ retval = PTR_ERR_OR_ZERO(files);
if (retval)
goto out_ret;

@@ -1729,7 +1730,7 @@ static int do_execveat_common(int fd, struct filename *filename,
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
if (!bprm)
goto out_files;
- bprm->new_files = current->files;
+ bprm->new_files = files;

retval = prepare_bprm_creds(bprm);
if (retval)
@@ -1813,8 +1814,8 @@ static int do_execveat_common(int fd, struct filename *filename,
free_bprm(bprm);
kfree(pathbuf);
putname(filename);
- if (displaced)
- put_files_struct(displaced);
+ if (files != current->files)
+ assign_files_struct(files);
return retval;

out:
@@ -1832,8 +1833,8 @@ static int do_execveat_common(int fd, struct filename *filename,
kfree(pathbuf);

out_files:
- if (displaced)
- reset_files_struct(displaced);
+ if (files != current->files)
+ put_files_struct(files);
out_ret:
putname(filename);
return retval;
diff --git a/fs/file.c b/fs/file.c
index e578e5537c32..7ed519c65d0b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -427,7 +427,7 @@ void put_files_struct(struct files_struct *files)
}
}

-void reset_files_struct(struct files_struct *files)
+void assign_files_struct(struct files_struct *files)
{
struct task_struct *tsk = current;
struct files_struct *old;
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 1c65817673db..00db7eadf895 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -104,8 +104,8 @@ struct task_struct;

struct files_struct *get_files_struct(struct task_struct *);
void put_files_struct(struct files_struct *fs);
-void reset_files_struct(struct files_struct *);
-int unshare_files(struct files_struct **);
+void assign_files_struct(struct files_struct *);
+struct files_struct *unshare_files(void);
struct files_struct *dup_fd(struct files_struct *, int *) __latent_entropy;
void do_close_on_exec(struct files_struct *);
int iterate_fd(struct files_struct *, unsigned,
diff --git a/kernel/fork.c b/kernel/fork.c
index 2295fc69717f..7690734eb354 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2434,22 +2434,17 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
* the exec layer of the kernel.
*/

-int unshare_files(struct files_struct **displaced)
+struct files_struct *unshare_files(void)
{
struct task_struct *task = current;
- struct files_struct *copy = NULL;
+ struct files_struct *files = task->files;
int error;

- error = unshare_fd(CLONE_FILES, &copy);
- if (error || !copy) {
- *displaced = NULL;
- return error;
- }
- *displaced = task->files;
- task_lock(task);
- task->files = copy;
- task_unlock(task);
- return 0;
+ error = unshare_fd(CLONE_FILES, &files);
+ if (error)
+ return ERR_PTR(error);
+
+ return files;
}

int sysctl_max_threads(struct ctl_table *table, int write,