ANDROID: vfs/ext4,f2fs: finish umount(2) in time with filesystem work

This patch changes umount(2) flow to wait for delayed fput/mntput. Meanwhile,
we can still see unclosed name spaces which can trigger filesystem panic due
to released device illustrated below. (i.e., ext4 with errors=panic)

So, it introduces fs->umount_end() to change filesystem behavior like
error=remount-ro in ext4.

WARN: DO NOT upstream!

This is only related to Android reboot procedure, and resolves the below
issue where a kernel panic happens when a living filesystem tries to access
dead block device after device_shutdown done by kernel_restart.

Term: namespace(mnt_get_count())

1. create_new_namespaces() creates ns1 and ns2,

  /data(1)    ns1(1)    ns2(1)
    |          |          |
     ---------------------
               |
        sb->s_active = 3

2. after binder_proc_clear_zombies() for ns2 and ns1 triggers
  - delayed_fput()
    - delayed_mntput_work(ns2)

  /data(1)    ns1(1)
    |          |
     ----------
          |
    sb->s_active = 2

3. umount() for /data is successed.

  ns1(1)
    |
 sb->s_active = 1

4. device_shutdown() by init

5.  - delayed_mntput_work(ns1)
     - put_super(), since sb->s_active = 0
       - -EIO

Bug: 63981945
Bug: 65481582
Bug: 72236603
Change-Id: I7db02f480cc839bf9c245e078164a8168ea0d88b
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
This commit is contained in:
Jaegeuk Kim
2017-09-12 17:56:31 -07:00
committed by Bruno Martins
parent b5bdb49f03
commit afd4669f0a
6 changed files with 80 additions and 0 deletions

View File

@@ -72,6 +72,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
static void ext4_clear_journal_err(struct super_block *sb,
struct ext4_super_block *es);
static int ext4_sync_fs(struct super_block *sb, int wait);
static void ext4_umount_end(struct super_block *sb, int flags);
static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
@@ -1275,6 +1276,7 @@ static const struct super_operations ext4_sops = {
.freeze_fs = ext4_freeze,
.unfreeze_fs = ext4_unfreeze,
.statfs = ext4_statfs,
.umount_end = ext4_umount_end,
.remount_fs = ext4_remount,
.show_options = ext4_show_options,
#ifdef CONFIG_QUOTA
@@ -4976,6 +4978,25 @@ struct ext4_mount_options {
#endif
};
static void ext4_umount_end(struct super_block *sb, int flags)
{
/*
* this is called at the end of umount(2). If there is an unclosed
* namespace, ext4 won't do put_super() which triggers fsck in the
* next boot.
*/
if ((flags & MNT_FORCE) || atomic_read(&sb->s_active) > 1) {
ext4_msg(sb, KERN_ERR,
"errors=remount-ro for active namespaces on umount %x",
flags);
clear_opt(sb, ERRORS_PANIC);
set_opt(sb, ERRORS_RO);
/* to write the latest s_kbytes_written */
if (!(sb->s_flags & MS_RDONLY))
ext4_commit_super(sb, 1);
}
}
static int ext4_remount(struct super_block *sb, int *flags, char *data)
{
struct ext4_super_block *es;

View File

@@ -1040,6 +1040,24 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
kvfree(sbi->devs);
}
static void f2fs_umount_end(struct super_block *sb, int flags)
{
/*
* this is called at the end of umount(2). If there is an unclosed
* namespace, f2fs won't do put_super() which triggers fsck in the
* next boot.
*/
if ((flags & MNT_FORCE) || atomic_read(&sb->s_active) > 1) {
/* to write the latest kbytes_written */
if (!(sb->s_flags & MS_RDONLY)) {
struct cp_control cpc = {
.reason = CP_UMOUNT,
};
f2fs_write_checkpoint(F2FS_SB(sb), &cpc);
}
}
}
static void f2fs_put_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -2218,6 +2236,7 @@ static const struct super_operations f2fs_sops = {
#endif
.evict_inode = f2fs_evict_inode,
.put_super = f2fs_put_super,
.umount_end = f2fs_umount_end,
.sync_fs = f2fs_sync_fs,
.freeze_fs = f2fs_freeze,
.unfreeze_fs = f2fs_unfreeze,

View File

@@ -261,6 +261,12 @@ void flush_delayed_fput(void)
static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
void flush_delayed_fput_wait(void)
{
delayed_fput(NULL);
flush_delayed_work(&delayed_fput_work);
}
void fput_many(struct file *file, unsigned int refs)
{
if (atomic_long_sub_and_test(refs, &file->f_count)) {

View File

@@ -20,6 +20,7 @@
#include <linux/fs_struct.h> /* get_fs_root et.al. */
#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
#include <linux/uaccess.h>
#include <linux/file.h>
#include <linux/proc_ns.h>
#include <linux/magic.h>
#include <linux/bootmem.h>
@@ -1163,6 +1164,12 @@ static void delayed_mntput(struct work_struct *unused)
}
static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
void flush_delayed_mntput_wait(void)
{
delayed_mntput(NULL);
flush_delayed_work(&delayed_mntput_work);
}
static void mntput_no_expire(struct mount *mnt)
{
rcu_read_lock();
@@ -1718,6 +1725,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
struct mount *mnt;
int retval;
int lookup_flags = 0;
bool user_request = !(current->flags & PF_KTHREAD);
if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
return -EINVAL;
@@ -1743,11 +1751,35 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
goto dput_and_out;
/* flush delayed_fput to put mnt_count */
if (user_request)
flush_delayed_fput_wait();
retval = do_umount(mnt, flags);
dput_and_out:
/* we mustn't call path_put() as that would clear mnt_expiry_mark */
dput(path.dentry);
mntput_no_expire(mnt);
if (!user_request)
goto out;
if (!retval) {
/*
* If the last delayed_fput() is called during do_umount()
* and makes mnt_count zero, we need to guarantee to register
* delayed_mntput by waiting for delayed_fput work again.
*/
flush_delayed_fput_wait();
/* flush delayed_mntput_work to put sb->s_active */
flush_delayed_mntput_wait();
}
if (!retval || (flags & MNT_FORCE)) {
/* filesystem needs to handle unclosed namespaces */
if (mnt->mnt.mnt_sb->s_op->umount_end)
mnt->mnt.mnt_sb->s_op->umount_end(mnt->mnt.mnt_sb, flags);
}
out:
return retval;
}

View File

@@ -86,6 +86,7 @@ extern void put_unused_fd(unsigned int fd);
extern void fd_install(unsigned int fd, struct file *file);
extern void flush_delayed_fput(void);
extern void flush_delayed_fput_wait(void);
extern void __fput_sync(struct file *);
#endif /* __LINUX_FILE_H */

View File

@@ -1850,6 +1850,7 @@ struct super_operations {
void *(*clone_mnt_data) (void *);
void (*copy_mnt_data) (void *, void *);
void (*umount_begin) (struct super_block *);
void (*umount_end) (struct super_block *, int);
int (*show_options)(struct seq_file *, struct dentry *);
int (*show_options2)(struct vfsmount *,struct seq_file *, struct dentry *);