btrfs: propagate last_unlink_trans earlier when doing a rmdir
[ Upstream commit c466e33e729a0ee017d10d919cba18f503853c60 ] In case the removed directory had a snapshot that was deleted, we are propagating its inode's last_unlink_trans to the parent directory after we removed the entry from the parent directory. This leaves a small race window where someone can log the parent directory after we removed the entry and before we updated last_unlink_trans, and as a result if we ever try to replay such a log tree, we will fail since we will attempt to remove a snapshot during log replay, which is currently not possible and results in the log replay (and mount) to fail. This is the type of failure described in commit1ec9a1ae1e("Btrfs: fix unreplayable log after snapshot delete + parent dir fsync"). So fix this by propagating the last_unlink_trans to the parent directory before we remove the entry from it. Fixes:44f714dae5("Btrfs: improve performance on fsync against new inode after rename/unlink") Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
db5f19a438
commit
b0d162c82b
@@ -4150,7 +4150,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
int err = 0;
|
||||
struct btrfs_root *root = BTRFS_I(dir)->root;
|
||||
struct btrfs_trans_handle *trans;
|
||||
u64 last_unlink_trans;
|
||||
|
||||
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
|
||||
return -ENOTEMPTY;
|
||||
@@ -4161,6 +4160,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
|
||||
/*
|
||||
* Propagate the last_unlink_trans value of the deleted dir to its
|
||||
* parent directory. This is to prevent an unrecoverable log tree in the
|
||||
* case we do something like this:
|
||||
* 1) create dir foo
|
||||
* 2) create snapshot under dir foo
|
||||
* 3) delete the snapshot
|
||||
* 4) rmdir foo
|
||||
* 5) mkdir foo
|
||||
* 6) fsync foo or some file inside foo
|
||||
*
|
||||
* This is because we can't unlink other roots when replaying the dir
|
||||
* deletes for directory foo.
|
||||
*/
|
||||
if (BTRFS_I(inode)->last_unlink_trans >= trans->transid)
|
||||
BTRFS_I(dir)->last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
|
||||
|
||||
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
|
||||
err = btrfs_unlink_subvol(trans, dir, dentry);
|
||||
goto out;
|
||||
@@ -4170,28 +4186,12 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
|
||||
|
||||
/* now the directory is empty */
|
||||
err = btrfs_unlink_inode(trans, root, BTRFS_I(dir),
|
||||
BTRFS_I(d_inode(dentry)), dentry->d_name.name,
|
||||
dentry->d_name.len);
|
||||
if (!err) {
|
||||
if (!err)
|
||||
btrfs_i_size_write(BTRFS_I(inode), 0);
|
||||
/*
|
||||
* Propagate the last_unlink_trans value of the deleted dir to
|
||||
* its parent directory. This is to prevent an unrecoverable
|
||||
* log tree in the case we do something like this:
|
||||
* 1) create dir foo
|
||||
* 2) create snapshot under dir foo
|
||||
* 3) delete the snapshot
|
||||
* 4) rmdir foo
|
||||
* 5) mkdir foo
|
||||
* 6) fsync foo or some file inside foo
|
||||
*/
|
||||
if (last_unlink_trans >= trans->transid)
|
||||
BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
|
||||
}
|
||||
out:
|
||||
btrfs_end_transaction(trans);
|
||||
btrfs_btree_balance_dirty(root->fs_info);
|
||||
|
||||
Reference in New Issue
Block a user