f2fs/fscrypt: catch up to v4.12

Applied f2fs-related patches only, and tried to exclude other layers' changes.

- fs/f2fs

c6b1e36c8fa0 Merge branch 'for-4.13/block' of git://git.kernel.dk/linux-block
81e3e044897b Merge tag 'uuid-for-4.13' of git://git.infradead.org/users/hch/uuid
fdd050b5b3c9 Merge branch 'uuid-types' of bombadil.infradead.org:public_git/uuid into nvme-base
4e4cbee93d56 block: switch bios to blk_status_t
d41519a69b35 crypto: Work around deallocated stack frame reference gcc bug on sparc.
85787090a21e fs: switch ->s_uuid to uuid_t
bf5f89463f5b Merge branch 'akpm' (patches from Andrew)
48fbfe50f1d5 fs: f2fs: use ktime_get_real_seconds for sit_info times
a7c3e901a46f mm: introduce kv[mz]alloc helpers
70ef8f0d3757 Merge tag 'for-f2fs-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
...

- fs/crypto/

4e4cbee93d56 block: switch bios to blk_status_t
17159420a6c1 fscrypt: introduce helper function for filename matching
6b06cdee81d6 fscrypt: avoid collisions when presenting long encrypted filenames
272f98f68462 fscrypt: fix context consistency check when key(s) unavailable
9c8268def612 fscrypt: Move key structure and constants to uapi
cd39e4bac111 fscrypt: remove unnecessary checks for NULL operations
a643f9054c21 Merge tag 'fscrypt-for-linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/fscrypt
...

- fs/ext4/
: Applied fscrypt-related changes only.

Signed-off-by: Jaegeuk Kim <jaegeuk@google.com>
This commit is contained in:
Jaegeuk Kim
2017-07-10 12:55:09 -07:00
parent 4d7931d727
commit e6b120d4d0
45 changed files with 5431 additions and 2880 deletions

View File

@@ -1,6 +1,5 @@
config FS_ENCRYPTION
tristate "FS Encryption (Per-file encryption)"
depends on BLOCK
select CRYPTO
select CRYPTO_AES
select CRYPTO_CBC
@@ -8,9 +7,7 @@ config FS_ENCRYPTION
select CRYPTO_XTS
select CRYPTO_CTS
select CRYPTO_CTR
select CRYPTO_SHA256
select KEYS
select ENCRYPTED_KEYS
help
Enable encryption of files and directories. This
feature is similar to ecryptfs, but it is more memory

View File

@@ -1,3 +1,4 @@
obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o
fscrypto-y := crypto.o fname.o policy.o keyinfo.o
fscrypto-$(CONFIG_BLOCK) += bio.o

145
fs/crypto/bio.c Normal file
View File

@@ -0,0 +1,145 @@
/*
* This contains encryption functions for per-file encryption.
*
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2015, Motorola Mobility
*
* Written by Michael Halcrow, 2014.
*
* Filename encryption additions
* Uday Savagaonkar, 2014
* Encryption policy handling additions
* Ildar Muslukhov, 2014
* Add fscrypt_pullback_bio_page()
* Jaegeuk Kim, 2015.
*
* This has not yet undergone a rigorous security audit.
*
* The usage of AES-XTS should conform to recommendations in NIST
* Special Publication 800-38E and IEEE P1619/D16.
*/
#include <linux/pagemap.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/namei.h>
#include "fscrypt_private.h"
/*
* Call fscrypt_decrypt_page on every single page, reusing the encryption
* context.
*/
static void completion_pages(struct work_struct *work)
{
struct fscrypt_ctx *ctx =
container_of(work, struct fscrypt_ctx, r.work);
struct bio *bio = ctx->r.bio;
struct bio_vec *bv;
int i;
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
int ret = fscrypt_decrypt_page(page->mapping->host, page,
PAGE_SIZE, 0, page->index);
if (ret) {
WARN_ON_ONCE(1);
SetPageError(page);
} else {
SetPageUptodate(page);
}
unlock_page(page);
}
fscrypt_release_ctx(ctx);
bio_put(bio);
}
void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio)
{
INIT_WORK(&ctx->r.work, completion_pages);
ctx->r.bio = bio;
queue_work(fscrypt_read_workqueue, &ctx->r.work);
}
EXPORT_SYMBOL(fscrypt_decrypt_bio_pages);
void fscrypt_pullback_bio_page(struct page **page, bool restore)
{
struct fscrypt_ctx *ctx;
struct page *bounce_page;
/* The bounce data pages are unmapped. */
if ((*page)->mapping)
return;
/* The bounce data page is unmapped. */
bounce_page = *page;
ctx = (struct fscrypt_ctx *)page_private(bounce_page);
/* restore control page */
*page = ctx->w.control_page;
if (restore)
fscrypt_restore_control_page(bounce_page);
}
EXPORT_SYMBOL(fscrypt_pullback_bio_page);
int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
struct fscrypt_ctx *ctx;
struct page *ciphertext_page = NULL;
struct bio *bio;
int ret, err = 0;
BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
ctx = fscrypt_get_ctx(inode, GFP_NOFS);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
ciphertext_page = fscrypt_alloc_bounce_page(ctx, GFP_NOWAIT);
if (IS_ERR(ciphertext_page)) {
err = PTR_ERR(ciphertext_page);
goto errout;
}
while (len--) {
err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk,
ZERO_PAGE(0), ciphertext_page,
PAGE_SIZE, 0, GFP_NOFS);
if (err)
goto errout;
bio = bio_alloc(GFP_NOWAIT, 1);
if (!bio) {
err = -ENOMEM;
goto errout;
}
bio->bi_bdev = inode->i_sb->s_bdev;
bio->bi_iter.bi_sector =
pblk << (inode->i_sb->s_blocksize_bits - 9);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
ret = bio_add_page(bio, ciphertext_page,
inode->i_sb->s_blocksize, 0);
if (ret != inode->i_sb->s_blocksize) {
/* should never happen! */
WARN_ON(1);
bio_put(bio);
err = -EIO;
goto errout;
}
err = submit_bio_wait(bio);
if (err == 0 && bio->bi_error)
err = -EIO;
bio_put(bio);
if (err)
goto errout;
lblk++;
pblk++;
}
err = 0;
errout:
fscrypt_release_ctx(ctx);
return err;
}
EXPORT_SYMBOL(fscrypt_zeroout_range);

View File

@@ -24,10 +24,9 @@
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <linux/ratelimit.h>
#include <linux/bio.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/fscrypto.h>
#include "fscrypt_private.h"
static unsigned int num_prealloc_crypto_pages = 32;
static unsigned int num_prealloc_crypto_ctxs = 128;
@@ -44,7 +43,7 @@ static mempool_t *fscrypt_bounce_page_pool = NULL;
static LIST_HEAD(fscrypt_free_ctxs);
static DEFINE_SPINLOCK(fscrypt_ctx_lock);
static struct workqueue_struct *fscrypt_read_workqueue;
struct workqueue_struct *fscrypt_read_workqueue;
static DEFINE_MUTEX(fscrypt_init_mutex);
static struct kmem_cache *fscrypt_ctx_cachep;
@@ -63,7 +62,7 @@ void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
{
unsigned long flags;
if (ctx->flags & FS_WRITE_PATH_FL && ctx->w.bounce_page) {
if (ctx->flags & FS_CTX_HAS_BOUNCE_BUFFER_FL && ctx->w.bounce_page) {
mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool);
ctx->w.bounce_page = NULL;
}
@@ -88,7 +87,7 @@ EXPORT_SYMBOL(fscrypt_release_ctx);
* Return: An allocated and initialized encryption context on success; error
* value or NULL otherwise.
*/
struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags)
struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags)
{
struct fscrypt_ctx *ctx = NULL;
struct fscrypt_info *ci = inode->i_crypt_info;
@@ -121,7 +120,7 @@ struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags)
} else {
ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
}
ctx->flags &= ~FS_WRITE_PATH_FL;
ctx->flags &= ~FS_CTX_HAS_BOUNCE_BUFFER_FL;
return ctx;
}
EXPORT_SYMBOL(fscrypt_get_ctx);
@@ -141,15 +140,10 @@ static void page_crypt_complete(struct crypto_async_request *req, int res)
complete(&ecr->completion);
}
typedef enum {
FS_DECRYPT = 0,
FS_ENCRYPT,
} fscrypt_direction_t;
static int do_page_crypto(struct inode *inode,
fscrypt_direction_t rw, pgoff_t index,
struct page *src_page, struct page *dest_page,
gfp_t gfp_flags)
int fscrypt_do_page_crypto(const struct inode *inode, fscrypt_direction_t rw,
u64 lblk_num, struct page *src_page,
struct page *dest_page, unsigned int len,
unsigned int offs, gfp_t gfp_flags)
{
struct {
__le64 index;
@@ -162,6 +156,8 @@ static int do_page_crypto(struct inode *inode,
struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
BUG_ON(len == 0);
req = skcipher_request_alloc(tfm, gfp_flags);
if (!req) {
printk_ratelimited(KERN_ERR
@@ -175,14 +171,14 @@ static int do_page_crypto(struct inode *inode,
page_crypt_complete, &ecr);
BUILD_BUG_ON(sizeof(xts_tweak) != FS_XTS_TWEAK_SIZE);
xts_tweak.index = cpu_to_le64(index);
xts_tweak.index = cpu_to_le64(lblk_num);
memset(xts_tweak.padding, 0, sizeof(xts_tweak.padding));
sg_init_table(&dst, 1);
sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
sg_set_page(&dst, dest_page, len, offs);
sg_init_table(&src, 1);
sg_set_page(&src, src_page, PAGE_SIZE, 0);
skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE, &xts_tweak);
sg_set_page(&src, src_page, len, offs);
skcipher_request_set_crypt(req, &src, &dst, len, &xts_tweak);
if (rw == FS_DECRYPT)
res = crypto_skcipher_decrypt(req);
else
@@ -202,53 +198,86 @@ static int do_page_crypto(struct inode *inode,
return 0;
}
static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
gfp_t gfp_flags)
{
ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
if (ctx->w.bounce_page == NULL)
return ERR_PTR(-ENOMEM);
ctx->flags |= FS_WRITE_PATH_FL;
ctx->flags |= FS_CTX_HAS_BOUNCE_BUFFER_FL;
return ctx->w.bounce_page;
}
/**
* fscypt_encrypt_page() - Encrypts a page
* @inode: The inode for which the encryption should take place
* @plaintext_page: The page to encrypt. Must be locked.
* @gfp_flags: The gfp flag for memory allocation
* @inode: The inode for which the encryption should take place
* @page: The page to encrypt. Must be locked for bounce-page
* encryption.
* @len: Length of data to encrypt in @page and encrypted
* data in returned page.
* @offs: Offset of data within @page and returned
* page holding encrypted data.
* @lblk_num: Logical block number. This must be unique for multiple
* calls with same inode, except when overwriting
* previously written data.
* @gfp_flags: The gfp flag for memory allocation
*
* Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
* encryption context.
* Encrypts @page using the ctx encryption context. Performs encryption
* either in-place or into a newly allocated bounce page.
* Called on the page write path.
*
* Called on the page write path. The caller must call
* Bounce page allocation is the default.
* In this case, the contents of @page are encrypted and stored in an
* allocated bounce page. @page has to be locked and the caller must call
* fscrypt_restore_control_page() on the returned ciphertext page to
* release the bounce buffer and the encryption context.
*
* Return: An allocated page with the encrypted content on success. Else, an
* In-place encryption is used by setting the FS_CFLG_OWN_PAGES flag in
* fscrypt_operations. Here, the input-page is returned with its content
* encrypted.
*
* Return: A page with the encrypted content on success. Else, an
* error value or NULL.
*/
struct page *fscrypt_encrypt_page(struct inode *inode,
struct page *plaintext_page, gfp_t gfp_flags)
struct page *fscrypt_encrypt_page(const struct inode *inode,
struct page *page,
unsigned int len,
unsigned int offs,
u64 lblk_num, gfp_t gfp_flags)
{
struct fscrypt_ctx *ctx;
struct page *ciphertext_page = NULL;
struct page *ciphertext_page = page;
int err;
BUG_ON(!PageLocked(plaintext_page));
BUG_ON(len % FS_CRYPTO_BLOCK_SIZE != 0);
if (inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES) {
/* with inplace-encryption we just encrypt the page */
err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num, page,
ciphertext_page, len, offs,
gfp_flags);
if (err)
return ERR_PTR(err);
return ciphertext_page;
}
BUG_ON(!PageLocked(page));
ctx = fscrypt_get_ctx(inode, gfp_flags);
if (IS_ERR(ctx))
return (struct page *)ctx;
/* The encryption operation will require a bounce page. */
ciphertext_page = alloc_bounce_page(ctx, gfp_flags);
ciphertext_page = fscrypt_alloc_bounce_page(ctx, gfp_flags);
if (IS_ERR(ciphertext_page))
goto errout;
ctx->w.control_page = plaintext_page;
err = do_page_crypto(inode, FS_ENCRYPT, plaintext_page->index,
plaintext_page, ciphertext_page,
gfp_flags);
ctx->w.control_page = page;
err = fscrypt_do_page_crypto(inode, FS_ENCRYPT, lblk_num,
page, ciphertext_page, len, offs,
gfp_flags);
if (err) {
ciphertext_page = ERR_PTR(err);
goto errout;
@@ -265,8 +294,13 @@ errout:
EXPORT_SYMBOL(fscrypt_encrypt_page);
/**
* f2crypt_decrypt_page() - Decrypts a page in-place
* @page: The page to decrypt. Must be locked.
* fscrypt_decrypt_page() - Decrypts a page in-place
* @inode: The corresponding inode for the page to decrypt.
* @page: The page to decrypt. Must be locked in case
* it is a writeback page (FS_CFLG_OWN_PAGES unset).
* @len: Number of bytes in @page to be decrypted.
* @offs: Start of data in @page.
* @lblk_num: Logical block number.
*
* Decrypts page in-place using the ctx encryption context.
*
@@ -274,76 +308,17 @@ EXPORT_SYMBOL(fscrypt_encrypt_page);
*
* Return: Zero on success, non-zero otherwise.
*/
int fscrypt_decrypt_page(struct page *page)
int fscrypt_decrypt_page(const struct inode *inode, struct page *page,
unsigned int len, unsigned int offs, u64 lblk_num)
{
BUG_ON(!PageLocked(page));
if (!(inode->i_sb->s_cop->flags & FS_CFLG_OWN_PAGES))
BUG_ON(!PageLocked(page));
return do_page_crypto(page->mapping->host,
FS_DECRYPT, page->index, page, page, GFP_NOFS);
return fscrypt_do_page_crypto(inode, FS_DECRYPT, lblk_num, page, page,
len, offs, GFP_NOFS);
}
EXPORT_SYMBOL(fscrypt_decrypt_page);
int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
struct fscrypt_ctx *ctx;
struct page *ciphertext_page = NULL;
struct bio *bio;
int ret, err = 0;
BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
ctx = fscrypt_get_ctx(inode, GFP_NOFS);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT);
if (IS_ERR(ciphertext_page)) {
err = PTR_ERR(ciphertext_page);
goto errout;
}
while (len--) {
err = do_page_crypto(inode, FS_ENCRYPT, lblk,
ZERO_PAGE(0), ciphertext_page,
GFP_NOFS);
if (err)
goto errout;
bio = bio_alloc(GFP_NOWAIT, 1);
if (!bio) {
err = -ENOMEM;
goto errout;
}
bio->bi_bdev = inode->i_sb->s_bdev;
bio->bi_iter.bi_sector =
pblk << (inode->i_sb->s_blocksize_bits - 9);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
ret = bio_add_page(bio, ciphertext_page,
inode->i_sb->s_blocksize, 0);
if (ret != inode->i_sb->s_blocksize) {
/* should never happen! */
WARN_ON(1);
bio_put(bio);
err = -EIO;
goto errout;
}
err = submit_bio_wait(bio);
if ((err == 0) && bio->bi_error)
err = -EIO;
bio_put(bio);
if (err)
goto errout;
lblk++;
pblk++;
}
err = 0;
errout:
fscrypt_release_ctx(ctx);
return err;
}
EXPORT_SYMBOL(fscrypt_zeroout_range);
/*
* Validate dentries for encrypted directories to make sure we aren't
* potentially caching stale data after a key has been added or
@@ -392,63 +367,6 @@ const struct dentry_operations fscrypt_d_ops = {
};
EXPORT_SYMBOL(fscrypt_d_ops);
/*
* Call fscrypt_decrypt_page on every single page, reusing the encryption
* context.
*/
static void completion_pages(struct work_struct *work)
{
struct fscrypt_ctx *ctx =
container_of(work, struct fscrypt_ctx, r.work);
struct bio *bio = ctx->r.bio;
struct bio_vec *bv;
int i;
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
int ret = fscrypt_decrypt_page(page);
if (ret) {
WARN_ON_ONCE(1);
SetPageError(page);
} else {
SetPageUptodate(page);
}
unlock_page(page);
}
fscrypt_release_ctx(ctx);
bio_put(bio);
}
void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio)
{
INIT_WORK(&ctx->r.work, completion_pages);
ctx->r.bio = bio;
queue_work(fscrypt_read_workqueue, &ctx->r.work);
}
EXPORT_SYMBOL(fscrypt_decrypt_bio_pages);
void fscrypt_pullback_bio_page(struct page **page, bool restore)
{
struct fscrypt_ctx *ctx;
struct page *bounce_page;
/* The bounce data pages are unmapped. */
if ((*page)->mapping)
return;
/* The bounce data page is unmapped. */
bounce_page = *page;
ctx = (struct fscrypt_ctx *)page_private(bounce_page);
/* restore control page */
*page = ctx->w.control_page;
if (restore)
fscrypt_restore_control_page(bounce_page);
}
EXPORT_SYMBOL(fscrypt_pullback_bio_page);
void fscrypt_restore_control_page(struct page *page)
{
struct fscrypt_ctx *ctx;
@@ -474,17 +392,22 @@ static void fscrypt_destroy(void)
/**
* fscrypt_initialize() - allocate major buffers for fs encryption.
* @cop_flags: fscrypt operations flags
*
* We only call this when we start accessing encrypted files, since it
* results in memory getting allocated that wouldn't otherwise be used.
*
* Return: Zero on success, non-zero otherwise.
*/
int fscrypt_initialize(void)
int fscrypt_initialize(unsigned int cop_flags)
{
int i, res = -ENOMEM;
if (fscrypt_bounce_page_pool)
/*
* No need to allocate a bounce page pool if there already is one or
* this FS won't use it.
*/
if (cop_flags & FS_CFLG_OWN_PAGES || fscrypt_bounce_page_pool)
return 0;
mutex_lock(&fscrypt_init_mutex);
@@ -513,7 +436,6 @@ fail:
mutex_unlock(&fscrypt_init_mutex);
return res;
}
EXPORT_SYMBOL(fscrypt_initialize);
/**
* fscrypt_init() - Set up for fs encryption.

View File

@@ -12,7 +12,7 @@
#include <linux/scatterlist.h>
#include <linux/ratelimit.h>
#include <linux/fscrypto.h>
#include "fscrypt_private.h"
/**
* fname_crypt_complete() - completion callback for filename crypto
@@ -159,6 +159,8 @@ static int fname_decrypt(struct inode *inode,
static const char *lookup_table =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
/**
* digest_encode() -
*
@@ -209,7 +211,7 @@ static int digest_decode(const char *src, int len, char *dst)
return cp - dst;
}
u32 fscrypt_fname_encrypted_size(struct inode *inode, u32 ilen)
u32 fscrypt_fname_encrypted_size(const struct inode *inode, u32 ilen)
{
int padding = 32;
struct fscrypt_info *ci = inode->i_crypt_info;
@@ -227,14 +229,17 @@ EXPORT_SYMBOL(fscrypt_fname_encrypted_size);
* Allocates an output buffer that is sufficient for the crypto operation
* specified by the context and the direction.
*/
int fscrypt_fname_alloc_buffer(struct inode *inode,
int fscrypt_fname_alloc_buffer(const struct inode *inode,
u32 ilen, struct fscrypt_str *crypto_str)
{
unsigned int olen = fscrypt_fname_encrypted_size(inode, ilen);
u32 olen = fscrypt_fname_encrypted_size(inode, ilen);
const u32 max_encoded_len =
max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE),
1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name)));
crypto_str->len = olen;
if (olen < FS_FNAME_CRYPTO_DIGEST_SIZE * 2)
olen = FS_FNAME_CRYPTO_DIGEST_SIZE * 2;
olen = max(olen, max_encoded_len);
/*
* Allocated buffer can hold one more character to null-terminate the
* string
@@ -266,6 +271,10 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer);
*
* The caller must have allocated sufficient memory for the @oname string.
*
* If the key is available, we'll decrypt the disk name; otherwise, we'll encode
* it for presentation. Short names are directly base64-encoded, while long
* names are encoded in fscrypt_digested_name format.
*
* Return: 0 on success, -errno on failure
*/
int fscrypt_fname_disk_to_usr(struct inode *inode,
@@ -274,7 +283,7 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
struct fscrypt_str *oname)
{
const struct qstr qname = FSTR_TO_QSTR(iname);
char buf[24];
struct fscrypt_digested_name digested_name;
if (fscrypt_is_dot_dotdot(&qname)) {
oname->name[0] = '.';
@@ -289,20 +298,24 @@ int fscrypt_fname_disk_to_usr(struct inode *inode,
if (inode->i_crypt_info)
return fname_decrypt(inode, iname, oname);
if (iname->len <= FS_FNAME_CRYPTO_DIGEST_SIZE) {
if (iname->len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) {
oname->len = digest_encode(iname->name, iname->len,
oname->name);
return 0;
}
if (hash) {
memcpy(buf, &hash, 4);
memcpy(buf + 4, &minor_hash, 4);
digested_name.hash = hash;
digested_name.minor_hash = minor_hash;
} else {
memset(buf, 0, 8);
digested_name.hash = 0;
digested_name.minor_hash = 0;
}
memcpy(buf + 8, iname->name + ((iname->len - 17) & ~15), 16);
memcpy(digested_name.digest,
FSCRYPT_FNAME_DIGEST(iname->name, iname->len),
FSCRYPT_FNAME_DIGEST_SIZE);
oname->name[0] = '_';
oname->len = 1 + digest_encode(buf, 24, oname->name + 1);
oname->len = 1 + digest_encode((const char *)&digested_name,
sizeof(digested_name), oname->name + 1);
return 0;
}
EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
@@ -332,14 +345,39 @@ int fscrypt_fname_usr_to_disk(struct inode *inode,
* in a directory. Consequently, a user space name cannot be mapped to
* a disk-space name
*/
return -EACCES;
return -ENOKEY;
}
EXPORT_SYMBOL(fscrypt_fname_usr_to_disk);
/**
* fscrypt_setup_filename() - prepare to search a possibly encrypted directory
* @dir: the directory that will be searched
* @iname: the user-provided filename being searched for
* @lookup: 1 if we're allowed to proceed without the key because it's
* ->lookup() or we're finding the dir_entry for deletion; 0 if we cannot
* proceed without the key because we're going to create the dir_entry.
* @fname: the filename information to be filled in
*
* Given a user-provided filename @iname, this function sets @fname->disk_name
* to the name that would be stored in the on-disk directory entry, if possible.
* If the directory is unencrypted this is simply @iname. Else, if we have the
* directory's encryption key, then @iname is the plaintext, so we encrypt it to
* get the disk_name.
*
* Else, for keyless @lookup operations, @iname is the presented ciphertext, so
* we decode it to get either the ciphertext disk_name (for short names) or the
* fscrypt_digested_name (for long names). Non-@lookup operations will be
* impossible in this case, so we fail them with ENOKEY.
*
* If successful, fscrypt_free_filename() must be called later to clean up.
*
* Return: 0 on success, -errno on failure
*/
int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
int lookup, struct fscrypt_name *fname)
{
int ret = 0, bigname = 0;
int ret;
int digested;
memset(fname, 0, sizeof(struct fscrypt_name));
fname->usr_fname = iname;
@@ -367,31 +405,43 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
return 0;
}
if (!lookup)
return -EACCES;
return -ENOKEY;
/*
* We don't have the key and we are doing a lookup; decode the
* user-supplied name
*/
if (iname->name[0] == '_')
bigname = 1;
if ((bigname && (iname->len != 33)) || (!bigname && (iname->len > 43)))
return -ENOENT;
if (iname->name[0] == '_') {
if (iname->len !=
1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name)))
return -ENOENT;
digested = 1;
} else {
if (iname->len >
BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE))
return -ENOENT;
digested = 0;
}
fname->crypto_buf.name = kmalloc(32, GFP_KERNEL);
fname->crypto_buf.name =
kmalloc(max_t(size_t, FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE,
sizeof(struct fscrypt_digested_name)),
GFP_KERNEL);
if (fname->crypto_buf.name == NULL)
return -ENOMEM;
ret = digest_decode(iname->name + bigname, iname->len - bigname,
ret = digest_decode(iname->name + digested, iname->len - digested,
fname->crypto_buf.name);
if (ret < 0) {
ret = -ENOENT;
goto errout;
}
fname->crypto_buf.len = ret;
if (bigname) {
memcpy(&fname->hash, fname->crypto_buf.name, 4);
memcpy(&fname->minor_hash, fname->crypto_buf.name + 4, 4);
if (digested) {
const struct fscrypt_digested_name *n =
(const void *)fname->crypto_buf.name;
fname->hash = n->hash;
fname->minor_hash = n->minor_hash;
} else {
fname->disk_name.name = fname->crypto_buf.name;
fname->disk_name.len = fname->crypto_buf.len;

View File

@@ -0,0 +1,90 @@
/*
* fscrypt_private.h
*
* Copyright (C) 2015, Google, Inc.
*
* This contains encryption key functions.
*
* Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
*/
#ifndef _FSCRYPT_PRIVATE_H
#define _FSCRYPT_PRIVATE_H
#include <linux/fscrypt_supp.h>
/* Encryption parameters */
#define FS_XTS_TWEAK_SIZE 16
#define FS_AES_128_ECB_KEY_SIZE 16
#define FS_AES_256_GCM_KEY_SIZE 32
#define FS_AES_256_CBC_KEY_SIZE 32
#define FS_AES_256_CTS_KEY_SIZE 32
#define FS_AES_256_XTS_KEY_SIZE 64
#define FS_KEY_DERIVATION_NONCE_SIZE 16
/**
* Encryption context for inode
*
* Protector format:
* 1 byte: Protector format (1 = this version)
* 1 byte: File contents encryption mode
* 1 byte: File names encryption mode
* 1 byte: Flags
* 8 bytes: Master Key descriptor
* 16 bytes: Encryption Key derivation nonce
*/
struct fscrypt_context {
u8 format;
u8 contents_encryption_mode;
u8 filenames_encryption_mode;
u8 flags;
u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
} __packed;
#define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1
/*
* A pointer to this structure is stored in the file system's in-core
* representation of an inode.
*/
struct fscrypt_info {
u8 ci_data_mode;
u8 ci_filename_mode;
u8 ci_flags;
struct crypto_skcipher *ci_ctfm;
u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
};
typedef enum {
FS_DECRYPT = 0,
FS_ENCRYPT,
} fscrypt_direction_t;
#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
#define FS_CTX_HAS_BOUNCE_BUFFER_FL 0x00000002
struct fscrypt_completion_result {
struct completion completion;
int res;
};
#define DECLARE_FS_COMPLETION_RESULT(ecr) \
struct fscrypt_completion_result ecr = { \
COMPLETION_INITIALIZER_ONSTACK((ecr).completion), 0 }
/* crypto.c */
extern int fscrypt_initialize(unsigned int cop_flags);
extern struct workqueue_struct *fscrypt_read_workqueue;
extern int fscrypt_do_page_crypto(const struct inode *inode,
fscrypt_direction_t rw, u64 lblk_num,
struct page *src_page,
struct page *dest_page,
unsigned int len, unsigned int offs,
gfp_t gfp_flags);
extern struct page *fscrypt_alloc_bounce_page(struct fscrypt_ctx *ctx,
gfp_t gfp_flags);
#endif /* _FSCRYPT_PRIVATE_H */

View File

@@ -10,7 +10,7 @@
#include <keys/user-type.h>
#include <linux/scatterlist.h>
#include <linux/fscrypto.h>
#include "fscrypt_private.h"
static void derive_crypt_complete(struct crypto_async_request *req, int rc)
{
@@ -77,26 +77,22 @@ out:
static int validate_user_key(struct fscrypt_info *crypt_info,
struct fscrypt_context *ctx, u8 *raw_key,
u8 *prefix, int prefix_size)
const char *prefix)
{
u8 *full_key_descriptor;
char *description;
struct key *keyring_key;
struct fscrypt_key *master_key;
const struct user_key_payload *ukp;
int full_key_len = prefix_size + (FS_KEY_DESCRIPTOR_SIZE * 2) + 1;
int res;
full_key_descriptor = kmalloc(full_key_len, GFP_NOFS);
if (!full_key_descriptor)
description = kasprintf(GFP_NOFS, "%s%*phN", prefix,
FS_KEY_DESCRIPTOR_SIZE,
ctx->master_key_descriptor);
if (!description)
return -ENOMEM;
memcpy(full_key_descriptor, prefix, prefix_size);
sprintf(full_key_descriptor + prefix_size,
"%*phN", FS_KEY_DESCRIPTOR_SIZE,
ctx->master_key_descriptor);
full_key_descriptor[full_key_len - 1] = '\0';
keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
kfree(full_key_descriptor);
keyring_key = request_key(&key_type_logon, description, NULL);
kfree(description);
if (IS_ERR(keyring_key))
return PTR_ERR(keyring_key);
down_read(&keyring_key->sem);
@@ -107,7 +103,6 @@ static int validate_user_key(struct fscrypt_info *crypt_info,
res = -ENOKEY;
goto out;
}
down_read(&keyring_key->sem);
ukp = user_key_payload_locked(keyring_key);
if (ukp->datalen != sizeof(struct fscrypt_key)) {
res = -EINVAL;
@@ -184,21 +179,21 @@ int fscrypt_get_encryption_info(struct inode *inode)
if (inode->i_crypt_info)
return 0;
res = fscrypt_initialize();
res = fscrypt_initialize(inode->i_sb->s_cop->flags);
if (res)
return res;
if (!inode->i_sb->s_cop->get_context)
return -EOPNOTSUPP;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0) {
if (!fscrypt_dummy_context_enabled(inode))
if (!fscrypt_dummy_context_enabled(inode) ||
inode->i_sb->s_cop->is_encrypted(inode))
return res;
/* Fake up a context for an unencrypted directory */
memset(&ctx, 0, sizeof(ctx));
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE);
} else if (res != sizeof(ctx)) {
return -EINVAL;
}
@@ -233,20 +228,10 @@ int fscrypt_get_encryption_info(struct inode *inode)
if (!raw_key)
goto out;
if (fscrypt_dummy_context_enabled(inode)) {
memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
goto got_key;
}
res = validate_user_key(crypt_info, &ctx, raw_key,
FS_KEY_DESC_PREFIX, FS_KEY_DESC_PREFIX_SIZE);
res = validate_user_key(crypt_info, &ctx, raw_key, FS_KEY_DESC_PREFIX);
if (res && inode->i_sb->s_cop->key_prefix) {
u8 *prefix = NULL;
int prefix_size, res2;
prefix_size = inode->i_sb->s_cop->key_prefix(inode, &prefix);
res2 = validate_user_key(crypt_info, &ctx, raw_key,
prefix, prefix_size);
int res2 = validate_user_key(crypt_info, &ctx, raw_key,
inode->i_sb->s_cop->key_prefix);
if (res2) {
if (res2 == -ENOKEY)
res = -ENOKEY;
@@ -255,7 +240,6 @@ int fscrypt_get_encryption_info(struct inode *inode)
} else if (res) {
goto out;
}
got_key:
ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;

View File

@@ -10,76 +10,41 @@
#include <linux/random.h>
#include <linux/string.h>
#include <linux/fscrypto.h>
#include <linux/mount.h>
static int inode_has_encryption_context(struct inode *inode)
{
if (!inode->i_sb->s_cop->get_context)
return 0;
return (inode->i_sb->s_cop->get_context(inode, NULL, 0L) > 0);
}
#include "fscrypt_private.h"
/*
* check whether the policy is consistent with the encryption context
* for the inode
* check whether an encryption policy is consistent with an encryption context
*/
static int is_encryption_context_consistent_with_policy(struct inode *inode,
static bool is_encryption_context_consistent_with_policy(
const struct fscrypt_context *ctx,
const struct fscrypt_policy *policy)
{
struct fscrypt_context ctx;
int res;
if (!inode->i_sb->s_cop->get_context)
return 0;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res != sizeof(ctx))
return 0;
return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(ctx.flags == policy->flags) &&
(ctx.contents_encryption_mode ==
policy->contents_encryption_mode) &&
(ctx.filenames_encryption_mode ==
policy->filenames_encryption_mode));
return memcmp(ctx->master_key_descriptor, policy->master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE) == 0 &&
(ctx->flags == policy->flags) &&
(ctx->contents_encryption_mode ==
policy->contents_encryption_mode) &&
(ctx->filenames_encryption_mode ==
policy->filenames_encryption_mode);
}
static int create_encryption_context_from_policy(struct inode *inode,
const struct fscrypt_policy *policy)
{
struct fscrypt_context ctx;
int res;
if (!inode->i_sb->s_cop->set_context)
return -EOPNOTSUPP;
if (inode->i_sb->s_cop->prepare_context) {
res = inode->i_sb->s_cop->prepare_context(inode);
if (res)
return res;
}
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
if (!fscrypt_valid_contents_enc_mode(
policy->contents_encryption_mode)) {
printk(KERN_WARNING
"%s: Invalid contents encryption mode %d\n", __func__,
policy->contents_encryption_mode);
policy->contents_encryption_mode))
return -EINVAL;
}
if (!fscrypt_valid_filenames_enc_mode(
policy->filenames_encryption_mode)) {
printk(KERN_WARNING
"%s: Invalid filenames encryption mode %d\n", __func__,
policy->filenames_encryption_mode);
policy->filenames_encryption_mode))
return -EINVAL;
}
if (policy->flags & ~FS_POLICY_FLAGS_VALID)
return -EINVAL;
@@ -93,16 +58,20 @@ static int create_encryption_context_from_policy(struct inode *inode,
return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL);
}
int fscrypt_process_policy(struct file *filp,
const struct fscrypt_policy *policy)
int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
{
struct fscrypt_policy policy;
struct inode *inode = file_inode(filp);
int ret;
struct fscrypt_context ctx;
if (copy_from_user(&policy, arg, sizeof(policy)))
return -EFAULT;
if (!inode_owner_or_capable(inode))
return -EACCES;
if (policy->version != 0)
if (policy.version != 0)
return -EINVAL;
ret = mnt_want_write_file(filp);
@@ -111,22 +80,23 @@ int fscrypt_process_policy(struct file *filp,
inode_lock(inode);
if (!inode_has_encryption_context(inode)) {
ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (ret == -ENODATA) {
if (!S_ISDIR(inode->i_mode))
ret = -EINVAL;
else if (!inode->i_sb->s_cop->empty_dir)
ret = -EOPNOTSUPP;
ret = -ENOTDIR;
else if (!inode->i_sb->s_cop->empty_dir(inode))
ret = -ENOTEMPTY;
else
ret = create_encryption_context_from_policy(inode,
policy);
} else if (!is_encryption_context_consistent_with_policy(inode,
policy)) {
printk(KERN_WARNING
"%s: Policy inconsistent with encryption context\n",
__func__);
ret = -EINVAL;
&policy);
} else if (ret == sizeof(ctx) &&
is_encryption_context_consistent_with_policy(&ctx,
&policy)) {
/* The file already uses the same encryption policy. */
ret = 0;
} else if (ret >= 0 || ret == -ERANGE) {
/* The file already uses a different encryption policy. */
ret = -EEXIST;
}
inode_unlock(inode);
@@ -134,32 +104,38 @@ int fscrypt_process_policy(struct file *filp,
mnt_drop_write_file(filp);
return ret;
}
EXPORT_SYMBOL(fscrypt_process_policy);
EXPORT_SYMBOL(fscrypt_ioctl_set_policy);
int fscrypt_get_policy(struct inode *inode, struct fscrypt_policy *policy)
int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
{
struct inode *inode = file_inode(filp);
struct fscrypt_context ctx;
struct fscrypt_policy policy;
int res;
if (!inode->i_sb->s_cop->get_context ||
!inode->i_sb->s_cop->is_encrypted(inode))
if (!inode->i_sb->s_cop->is_encrypted(inode))
return -ENODATA;
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0 && res != -ERANGE)
return res;
if (res != sizeof(ctx))
return -ENODATA;
return -EINVAL;
if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1)
return -EINVAL;
policy->version = 0;
policy->contents_encryption_mode = ctx.contents_encryption_mode;
policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
policy->flags = ctx.flags;
memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
policy.version = 0;
policy.contents_encryption_mode = ctx.contents_encryption_mode;
policy.filenames_encryption_mode = ctx.filenames_encryption_mode;
policy.flags = ctx.flags;
memcpy(policy.master_key_descriptor, ctx.master_key_descriptor,
FS_KEY_DESCRIPTOR_SIZE);
if (copy_to_user(arg, &policy, sizeof(policy)))
return -EFAULT;
return 0;
}
EXPORT_SYMBOL(fscrypt_get_policy);
EXPORT_SYMBOL(fscrypt_ioctl_get_policy);
/**
* fscrypt_has_permitted_context() - is a file's encryption policy permitted
@@ -258,9 +234,9 @@ EXPORT_SYMBOL(fscrypt_has_permitted_context);
* @parent: Parent inode from which the context is inherited.
* @child: Child inode that inherits the context from @parent.
* @fs_data: private data given by FS.
* @preload: preload child i_crypt_info
* @preload: preload child i_crypt_info if true
*
* Return: Zero on success, non-zero otherwise
* Return: 0 on success, -errno on failure
*/
int fscrypt_inherit_context(struct inode *parent, struct inode *child,
void *fs_data, bool preload)
@@ -269,9 +245,6 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
struct fscrypt_info *ci;
int res;
if (!parent->i_sb->s_cop->set_context)
return -EOPNOTSUPP;
res = fscrypt_get_encryption_info(parent);
if (res < 0)
return res;
@@ -281,19 +254,11 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
return -ENOKEY;
ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
if (fscrypt_dummy_context_enabled(parent)) {
ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
ctx.flags = 0;
memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE);
res = 0;
} else {
ctx.contents_encryption_mode = ci->ci_data_mode;
ctx.filenames_encryption_mode = ci->ci_filename_mode;
ctx.flags = ci->ci_flags;
memcpy(ctx.master_key_descriptor, ci->ci_master_key,
FS_KEY_DESCRIPTOR_SIZE);
}
ctx.contents_encryption_mode = ci->ci_data_mode;
ctx.filenames_encryption_mode = ci->ci_filename_mode;
ctx.flags = ci->ci_flags;
memcpy(ctx.master_key_descriptor, ci->ci_master_key,
FS_KEY_DESCRIPTOR_SIZE);
get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
res = parent->i_sb->s_cop->set_context(child, &ctx,
sizeof(ctx), fs_data);

View File

@@ -32,7 +32,11 @@
#include <linux/percpu_counter.h>
#include <linux/ratelimit.h>
#include <crypto/hash.h>
#include <linux/fscrypto.h>
#ifdef CONFIG_EXT4_FS_ENCRYPTION
#include <linux/fscrypt_supp.h>
#else
#include <linux/fscrypt_notsupp.h>
#endif
#include <linux/falloc.h>
#include <linux/percpu-rwsem.h>
#ifdef __KERNEL__
@@ -1342,11 +1346,6 @@ struct ext4_super_block {
/* Number of quota types we support */
#define EXT4_MAXQUOTAS 3
#ifdef CONFIG_EXT4_FS_ENCRYPTION
#define EXT4_KEY_DESC_PREFIX "ext4:"
#define EXT4_KEY_DESC_PREFIX_SIZE 5
#endif
/*
* fourth extended-fs super-block data in memory
*/
@@ -1516,12 +1515,6 @@ struct ext4_sb_info {
/* Barrier between changing inodes' journal flags and writepages ops. */
struct percpu_rw_semaphore s_journal_flag_rwsem;
/* Encryption support */
#ifdef CONFIG_EXT4_FS_ENCRYPTION
u8 key_prefix[EXT4_KEY_DESC_PREFIX_SIZE];
u8 key_prefix_size;
#endif
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -2277,11 +2270,6 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
struct ext4_group_desc *gdp);
ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
static inline int ext4_sb_has_crypto(struct super_block *sb)
{
return ext4_has_feature_encrypt(sb);
}
static inline bool ext4_encrypted_inode(struct inode *inode)
{
return ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT);
@@ -2330,28 +2318,6 @@ static inline int ext4_fname_setup_filename(struct inode *dir,
}
static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
#define fscrypt_set_d_op(i)
#define fscrypt_get_ctx fscrypt_notsupp_get_ctx
#define fscrypt_release_ctx fscrypt_notsupp_release_ctx
#define fscrypt_encrypt_page fscrypt_notsupp_encrypt_page
#define fscrypt_decrypt_page fscrypt_notsupp_decrypt_page
#define fscrypt_decrypt_bio_pages fscrypt_notsupp_decrypt_bio_pages
#define fscrypt_pullback_bio_page fscrypt_notsupp_pullback_bio_page
#define fscrypt_restore_control_page fscrypt_notsupp_restore_control_page
#define fscrypt_zeroout_range fscrypt_notsupp_zeroout_range
#define fscrypt_process_policy fscrypt_notsupp_process_policy
#define fscrypt_get_policy fscrypt_notsupp_get_policy
#define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context
#define fscrypt_inherit_context fscrypt_notsupp_inherit_context
#define fscrypt_get_encryption_info fscrypt_notsupp_get_encryption_info
#define fscrypt_put_encryption_info fscrypt_notsupp_put_encryption_info
#define fscrypt_setup_filename fscrypt_notsupp_setup_filename
#define fscrypt_free_filename fscrypt_notsupp_free_filename
#define fscrypt_fname_encrypted_size fscrypt_notsupp_fname_encrypted_size
#define fscrypt_fname_alloc_buffer fscrypt_notsupp_fname_alloc_buffer
#define fscrypt_fname_free_buffer fscrypt_notsupp_fname_free_buffer
#define fscrypt_fname_disk_to_usr fscrypt_notsupp_fname_disk_to_usr
#define fscrypt_fname_usr_to_disk fscrypt_notsupp_fname_usr_to_disk
#endif
/* dir.c */
@@ -2373,17 +2339,16 @@ extern int ext4_find_dest_de(struct inode *dir, struct inode *inode,
void *buf, int buf_size,
struct ext4_filename *fname,
struct ext4_dir_entry_2 **dest_de);
int ext4_insert_dentry(struct inode *dir,
struct inode *inode,
struct ext4_dir_entry_2 *de,
int buf_size,
struct ext4_filename *fname);
void ext4_insert_dentry(struct inode *inode,
struct ext4_dir_entry_2 *de,
int buf_size,
struct ext4_filename *fname);
static inline void ext4_update_dx_flag(struct inode *inode)
{
if (!ext4_has_feature_dir_index(inode->i_sb))
ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
}
static unsigned char ext4_filetype_table[] = {
static const unsigned char ext4_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
@@ -3070,7 +3035,7 @@ extern int ext4_handle_dirty_dirent_node(handle_t *handle,
struct inode *inode,
struct buffer_head *bh);
#define S_SHIFT 12
static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
static const unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = EXT4_FT_DIR,
[S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV,

View File

@@ -771,7 +771,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
if (err)
return ERR_PTR(err);
if (!fscrypt_has_encryption_key(dir))
return ERR_PTR(-EPERM);
return ERR_PTR(-ENOKEY);
if (!handle)
nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
encrypt = 1;
@@ -1093,6 +1093,17 @@ got:
if (err)
goto fail_drop;
/*
* Since the encryption xattr will always be unique, create it first so
* that it's less likely to end up in an external xattr block and
* prevent its deduplication.
*/
if (encrypt) {
err = fscrypt_inherit_context(dir, inode, handle, true);
if (err)
goto fail_free_drop;
}
err = ext4_init_acl(handle, inode, dir);
if (err)
goto fail_free_drop;
@@ -1114,13 +1125,6 @@ got:
ei->i_datasync_tid = handle->h_transaction->t_tid;
}
if (encrypt) {
/* give pointer to avoid set_context with journal ops. */
err = fscrypt_inherit_context(dir, inode, &encrypt, true);
if (err)
goto fail_free_drop;
}
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
ext4_std_error(sb, err);

View File

@@ -1021,7 +1021,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
err = ext4_journal_get_write_access(handle, iloc->bh);
if (err)
return err;
ext4_insert_dentry(dir, inode, de, inline_size, fname);
ext4_insert_dentry(inode, de, inline_size, fname);
ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);

View File

@@ -1165,7 +1165,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
if (unlikely(err))
page_zero_new_buffers(page, from, to);
else if (decrypt)
err = fscrypt_decrypt_page(page);
err = fscrypt_decrypt_page(page->mapping->host, page,
PAGE_SIZE, 0, page->index);
return err;
}
#endif
@@ -3755,7 +3756,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,
/* We expect the key to be set. */
BUG_ON(!fscrypt_has_encryption_key(inode));
BUG_ON(blocksize != PAGE_SIZE);
WARN_ON_ONCE(fscrypt_decrypt_page(page));
WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host,
page, PAGE_SIZE, 0, page->index));
}
}
if (ext4_should_journal_data(inode)) {

View File

@@ -191,6 +191,7 @@ journal_err_out:
return err;
}
#ifdef CONFIG_EXT4_FS_ENCRYPTION
static int uuid_is_zero(__u8 u[16])
{
int i;
@@ -200,6 +201,7 @@ static int uuid_is_zero(__u8 u[16])
return 0;
return 1;
}
#endif
static int ext4_ioctl_setflags(struct inode *inode,
unsigned int flags)
@@ -765,28 +767,19 @@ resizefs_out:
}
case EXT4_IOC_PRECACHE_EXTENTS:
return ext4_ext_precache(inode);
case EXT4_IOC_SET_ENCRYPTION_POLICY: {
#ifdef CONFIG_EXT4_FS_ENCRYPTION
struct fscrypt_policy policy;
case EXT4_IOC_SET_ENCRYPTION_POLICY:
if (!ext4_has_feature_encrypt(sb))
return -EOPNOTSUPP;
return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
if (copy_from_user(&policy,
(struct fscrypt_policy __user *)arg,
sizeof(policy)))
return -EFAULT;
return fscrypt_process_policy(filp, &policy);
#else
return -EOPNOTSUPP;
#endif
}
case EXT4_IOC_GET_ENCRYPTION_PWSALT: {
#ifdef CONFIG_EXT4_FS_ENCRYPTION
int err, err2;
struct ext4_sb_info *sbi = EXT4_SB(sb);
handle_t *handle;
if (!ext4_sb_has_crypto(sb))
if (!ext4_has_feature_encrypt(sb))
return -EOPNOTSUPP;
if (uuid_is_zero(sbi->s_es->s_encrypt_pw_salt)) {
err = mnt_want_write_file(filp);
@@ -816,30 +809,18 @@ resizefs_out:
sbi->s_es->s_encrypt_pw_salt, 16))
return -EFAULT;
return 0;
}
case EXT4_IOC_GET_ENCRYPTION_POLICY: {
#ifdef CONFIG_EXT4_FS_ENCRYPTION
struct fscrypt_policy policy;
int err = 0;
if (!ext4_encrypted_inode(inode))
return -ENOENT;
err = fscrypt_get_policy(inode, &policy);
if (err)
return err;
if (copy_to_user((void __user *)arg, &policy, sizeof(policy)))
return -EFAULT;
return 0;
#else
return -EOPNOTSUPP;
#endif
}
case EXT4_IOC_GET_ENCRYPTION_POLICY:
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
case EXT4_IOC_FSGETXATTR:
{
struct fsxattr fa;
memset(&fa, 0, sizeof(struct fsxattr));
ext4_get_inode_flags(ei);
fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
if (ext4_has_feature_project(inode->i_sb)) {

View File

@@ -1237,37 +1237,24 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
}
/*
* NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
* Test whether a directory entry matches the filename being searched for.
*
* `len <= EXT4_NAME_LEN' is guaranteed by caller.
* `de != NULL' is guaranteed by caller.
* Return: %true if the directory entry matches, otherwise %false.
*/
static inline int ext4_match(struct ext4_filename *fname,
struct ext4_dir_entry_2 *de)
static inline bool ext4_match(const struct ext4_filename *fname,
const struct ext4_dir_entry_2 *de)
{
const void *name = fname_name(fname);
u32 len = fname_len(fname);
struct fscrypt_name f;
if (!de->inode)
return 0;
return false;
f.usr_fname = fname->usr_fname;
f.disk_name = fname->disk_name;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
if (unlikely(!name)) {
if (fname->usr_fname->name[0] == '_') {
int ret;
if (de->name_len <= 32)
return 0;
ret = memcmp(de->name + ((de->name_len - 17) & ~15),
fname->crypto_buf.name + 8, 16);
return (ret == 0) ? 1 : 0;
}
name = fname->crypto_buf.name;
len = fname->crypto_buf.len;
}
f.crypto_buf = fname->crypto_buf;
#endif
if (de->name_len != len)
return 0;
return (memcmp(de->name, name, len) == 0) ? 1 : 0;
return fscrypt_match_name(&f, de->name, de->name_len);
}
/*
@@ -1281,48 +1268,31 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
struct ext4_dir_entry_2 * de;
char * dlimit;
int de_len;
int res;
de = (struct ext4_dir_entry_2 *)search_buf;
dlimit = search_buf + buf_size;
while ((char *) de < dlimit) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
if ((char *) de + de->name_len <= dlimit) {
res = ext4_match(fname, de);
if (res < 0) {
res = -1;
goto return_result;
}
if (res > 0) {
/* found a match - just to be sure, do
* a full check */
if (ext4_check_dir_entry(dir, NULL, de, bh,
bh->b_data,
bh->b_size, offset)) {
res = -1;
goto return_result;
}
*res_dir = de;
res = 1;
goto return_result;
}
if ((char *) de + de->name_len <= dlimit &&
ext4_match(fname, de)) {
/* found a match - just to be sure, do
* a full check */
if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data,
bh->b_size, offset))
return -1;
*res_dir = de;
return 1;
}
/* prevent looping on a bad block */
de_len = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
if (de_len <= 0) {
res = -1;
goto return_result;
}
if (de_len <= 0)
return -1;
offset += de_len;
de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
}
res = 0;
return_result:
return res;
return 0;
}
static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
@@ -1378,6 +1348,8 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
return NULL;
retval = ext4_fname_setup_filename(dir, d_name, 1, &fname);
if (retval == -ENOENT)
return NULL;
if (retval)
return ERR_PTR(retval);
@@ -1614,16 +1586,9 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!fscrypt_has_permitted_context(dir, inode)) {
int nokey = ext4_encrypted_inode(inode) &&
!fscrypt_has_encryption_key(inode);
if (nokey) {
iput(inode);
return ERR_PTR(-ENOKEY);
}
ext4_warning(inode->i_sb,
"Inconsistent encryption contexts: %lu/%lu",
(unsigned long) dir->i_ino,
(unsigned long) inode->i_ino);
dir->i_ino, inode->i_ino);
iput(inode);
return ERR_PTR(-EPERM);
}
@@ -1831,24 +1796,15 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
int nlen, rlen;
unsigned int offset = 0;
char *top;
int res;
de = (struct ext4_dir_entry_2 *)buf;
top = buf + buf_size - reclen;
while ((char *) de <= top) {
if (ext4_check_dir_entry(dir, NULL, de, bh,
buf, buf_size, offset)) {
res = -EFSCORRUPTED;
goto return_result;
}
/* Provide crypto context and crypto buffer to ext4 match */
res = ext4_match(fname, de);
if (res < 0)
goto return_result;
if (res > 0) {
res = -EEXIST;
goto return_result;
}
buf, buf_size, offset))
return -EFSCORRUPTED;
if (ext4_match(fname, de))
return -EEXIST;
nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
if ((de->inode ? rlen - nlen : rlen) >= reclen)
@@ -1856,22 +1812,17 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode,
de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
offset += rlen;
}
if ((char *) de > top)
res = -ENOSPC;
else {
*dest_de = de;
res = 0;
}
return_result:
return res;
return -ENOSPC;
*dest_de = de;
return 0;
}
int ext4_insert_dentry(struct inode *dir,
struct inode *inode,
struct ext4_dir_entry_2 *de,
int buf_size,
struct ext4_filename *fname)
void ext4_insert_dentry(struct inode *inode,
struct ext4_dir_entry_2 *de,
int buf_size,
struct ext4_filename *fname)
{
int nlen, rlen;
@@ -1890,7 +1841,6 @@ int ext4_insert_dentry(struct inode *dir,
ext4_set_de_type(inode->i_sb, de, inode->i_mode);
de->name_len = fname_len(fname);
memcpy(de->name, fname_name(fname), fname_len(fname));
return 0;
}
/*
@@ -1926,11 +1876,8 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
return err;
}
/* By now the buffer is marked for journaling. Due to crypto operations,
* the following function call may fail */
err = ext4_insert_dentry(dir, inode, de, blocksize, fname);
if (err < 0)
return err;
/* By now the buffer is marked for journaling */
ext4_insert_dentry(inode, de, blocksize, fname);
/*
* XXX shouldn't update any times until successful
@@ -3090,7 +3037,7 @@ static int ext4_symlink(struct inode *dir,
if (err)
return err;
if (!fscrypt_has_encryption_key(dir))
return -EPERM;
return -ENOKEY;
disk_link.len = (fscrypt_fname_encrypted_size(dir, len) +
sizeof(struct fscrypt_symlink_data));
sd = kzalloc(disk_link.len, GFP_KERNEL);
@@ -3527,6 +3474,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
EXT4_I(old_dentry->d_inode)->i_projid)))
return -EXDEV;
if ((ext4_encrypted_inode(old_dir) &&
!fscrypt_has_encryption_key(old_dir)) ||
(ext4_encrypted_inode(new_dir) &&
!fscrypt_has_encryption_key(new_dir)))
return -ENOKEY;
retval = dquot_initialize(old.dir);
if (retval)
return retval;
@@ -3726,6 +3679,12 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
u8 new_file_type;
int retval;
if ((ext4_encrypted_inode(old_dir) &&
!fscrypt_has_encryption_key(old_dir)) ||
(ext4_encrypted_inode(new_dir) &&
!fscrypt_has_encryption_key(new_dir)))
return -ENOKEY;
if ((ext4_encrypted_inode(old_dir) ||
ext4_encrypted_inode(new_dir)) &&
(old_dir != new_dir) &&

View File

@@ -24,7 +24,6 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/backing-dev.h>
#include <linux/fscrypto.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -470,7 +469,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
gfp_t gfp_flags = GFP_NOFS;
retry_encrypt:
data_page = fscrypt_encrypt_page(inode, page, gfp_flags);
data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0,
page->index, gfp_flags);
if (IS_ERR(data_page)) {
ret = PTR_ERR(data_page);
if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {

View File

@@ -1102,51 +1102,65 @@ static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len);
}
static int ext4_key_prefix(struct inode *inode, u8 **key)
{
*key = EXT4_SB(inode->i_sb)->key_prefix;
return EXT4_SB(inode->i_sb)->key_prefix_size;
}
static int ext4_prepare_context(struct inode *inode)
{
return ext4_convert_inline_data(inode);
}
static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
void *fs_data)
{
handle_t *handle;
int res, res2;
handle_t *handle = fs_data;
int res, res2, retries = 0;
/* fs_data is null when internally used. */
if (fs_data) {
res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx,
len, 0);
res = ext4_convert_inline_data(inode);
if (res)
return res;
/*
* If a journal handle was specified, then the encryption context is
* being set on a new inode via inheritance and is part of a larger
* transaction to create the inode. Otherwise the encryption context is
* being set on an existing inode in its own transaction. Only in the
* latter case should the "retry on ENOSPC" logic be used.
*/
if (handle) {
res = ext4_xattr_set_handle(handle, inode,
EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
ctx, len, 0);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
ext4_clear_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
/*
* Update inode->i_flags - e.g. S_DAX may get disabled
*/
ext4_set_inode_flags(inode);
}
return res;
}
res = dquot_initialize(inode);
if (res)
return res;
retry:
handle = ext4_journal_start(inode, EXT4_HT_MISC,
ext4_jbd2_credits_xattr(inode));
if (IS_ERR(handle))
return PTR_ERR(handle);
res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx,
len, 0);
res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
ctx, len, 0);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
/* Update inode->i_flags - e.g. S_DAX may get disabled */
ext4_set_inode_flags(inode);
res = ext4_mark_inode_dirty(handle, inode);
if (res)
EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
}
res2 = ext4_journal_stop(handle);
if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
if (!res)
res = res2;
return res;
@@ -1163,10 +1177,9 @@ static unsigned ext4_max_namelen(struct inode *inode)
EXT4_NAME_LEN;
}
static struct fscrypt_operations ext4_cryptops = {
static const struct fscrypt_operations ext4_cryptops = {
.key_prefix = "ext4:",
.get_context = ext4_get_context,
.key_prefix = ext4_key_prefix,
.prepare_context = ext4_prepare_context,
.set_context = ext4_set_context,
.dummy_context = ext4_dummy_context,
.is_encrypted = ext4_encrypted_inode,
@@ -1174,7 +1187,7 @@ static struct fscrypt_operations ext4_cryptops = {
.max_namelen = ext4_max_namelen,
};
#else
static struct fscrypt_operations ext4_cryptops = {
static const struct fscrypt_operations ext4_cryptops = {
.is_encrypted = ext4_encrypted_inode,
};
#endif
@@ -4199,11 +4212,6 @@ no_journal:
ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
kfree(orig_data);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
memcpy(sbi->key_prefix, EXT4_KEY_DESC_PREFIX,
EXT4_KEY_DESC_PREFIX_SIZE);
sbi->key_prefix_size = EXT4_KEY_DESC_PREFIX_SIZE;
#endif
return 0;
cantfind_ext4:

View File

@@ -384,7 +384,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
if (error)
return error;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
if (default_acl) {
error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,

View File

@@ -65,7 +65,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.sbi = sbi,
.type = META,
.op = REQ_OP_READ,
.op_flags = READ_SYNC | REQ_META | REQ_PRIO,
.op_flags = REQ_META | REQ_PRIO,
.old_blkaddr = index,
.new_blkaddr = index,
.encrypted_page = NULL,
@@ -160,7 +160,7 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
.sbi = sbi,
.type = META,
.op = REQ_OP_READ,
.op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD,
.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
.encrypted_page = NULL,
};
struct blk_plug plug;
@@ -228,7 +228,7 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
f2fs_put_page(page, 0);
if (readahead)
ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true);
ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
}
static int f2fs_write_meta_page(struct page *page,
@@ -249,7 +249,8 @@ static int f2fs_write_meta_page(struct page *page,
dec_page_count(sbi, F2FS_DIRTY_META);
if (wbc->for_reclaim)
f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);
f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
0, page->index, META, WRITE);
unlock_page(page);
@@ -274,10 +275,11 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
goto skip_write;
trace_f2fs_writepages(mapping->host, wbc, META);
/* if locked failed, cp will flush dirty pages instead */
if (!mutex_trylock(&sbi->cp_mutex))
goto skip_write;
/* if mounting is failed, skip writing node pages */
mutex_lock(&sbi->cp_mutex);
trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
written = sync_meta_pages(sbi, META, wbc->nr_to_write);
mutex_unlock(&sbi->cp_mutex);
@@ -493,6 +495,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
f2fs_show_injection_info(FAULT_ORPHAN);
return -ENOSPC;
}
#endif
@@ -565,7 +568,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
if (ni.blk_addr != NULL_ADDR) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: orphan failed (ino=%x), run fsck to fix.",
"%s: orphan failed (ino=%x) by kernel, retry mount.",
__func__, ino);
return -EIO;
}
@@ -675,14 +678,13 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
if (crc_offset >= blk_size) {
if (crc_offset > (blk_size - sizeof(__le32))) {
f2fs_msg(sbi->sb, KERN_WARNING,
"invalid crc_offset: %zu", crc_offset);
return -EINVAL;
}
crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
+ crc_offset)));
crc = cur_cp_crc(*cp_block);
if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
return -EINVAL;
@@ -770,7 +772,7 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
/* Sanity checking of checkpoint */
if (sanity_check_ckpt(sbi))
goto fail_no_cp;
goto free_fail_no_cp;
if (cur_page == cp1)
sbi->cur_cp_pack = 1;
@@ -798,6 +800,9 @@ done:
f2fs_put_page(cp2, 1);
return 0;
free_fail_no_cp:
f2fs_put_page(cp1, 1);
f2fs_put_page(cp2, 1);
fail_no_cp:
kfree(sbi->ckpt);
return -EINVAL;
@@ -812,7 +817,9 @@ static void __add_dirty_inode(struct inode *inode, enum inode_type type)
return;
set_inode_flag(inode, flag);
list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
if (!f2fs_is_volatile_file(inode))
list_add_tail(&F2FS_I(inode)->dirty_list,
&sbi->inode_list[type]);
stat_inc_dirty_inode(sbi, type);
}
@@ -888,7 +895,7 @@ retry:
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
return 0;
}
fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[type]);
if (inode) {
@@ -921,18 +928,35 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return 0;
}
fi = list_entry(head->next, struct f2fs_inode_info,
fi = list_first_entry(head, struct f2fs_inode_info,
gdirty_list);
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[DIRTY_META]);
if (inode) {
update_inode_page(inode);
sync_inode_metadata(inode, 0);
/* it's on eviction */
if (is_inode_flag_set(inode, FI_DIRTY_INODE))
update_inode_page(inode);
iput(inode);
}
};
return 0;
}
static void __prepare_cp_block(struct f2fs_sb_info *sbi)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
nid_t last_nid = nm_i->next_scan_nid;
next_free_nid(sbi, &last_nid);
ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
ckpt->next_free_nid = cpu_to_le32(last_nid);
}
/*
* Freeze all the FS-operations for checkpoint.
*/
@@ -956,21 +980,26 @@ retry_flush_dents:
err = sync_dirty_inodes(sbi, DIR_INODE);
if (err)
goto out;
goto retry_flush_dents;
}
if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
goto out;
cond_resched();
goto retry_flush_dents;
}
/*
* POR: we should ensure that there are no dirty node pages
* until finishing nat/sit flush.
* until finishing nat/sit flush. inode->i_blocks can be updated.
*/
down_write(&sbi->node_change);
if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
err = f2fs_sync_inode_meta(sbi);
if (err)
goto out;
cond_resched();
goto retry_flush_dents;
}
retry_flush_nodes:
down_write(&sbi->node_write);
@@ -978,11 +1007,20 @@ retry_flush_nodes:
up_write(&sbi->node_write);
err = sync_node_pages(sbi, &wbc);
if (err) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
goto out;
}
cond_resched();
goto retry_flush_nodes;
}
/*
* sbi->node_change is used only for AIO write_begin path which produces
* dirty node blocks and some checkpoint values by block allocation.
*/
__prepare_cp_block(sbi);
up_write(&sbi->node_change);
out:
blk_finish_plug(&plug);
return err;
@@ -991,8 +1029,6 @@ out:
static void unblock_operations(struct f2fs_sb_info *sbi)
{
up_write(&sbi->node_write);
build_free_nids(sbi);
f2fs_unlock_all(sbi);
}
@@ -1003,7 +1039,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
for (;;) {
prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
if (!atomic_read(&sbi->nr_wb_bios))
if (!get_pages(sbi, F2FS_WB_CP_DATA))
break;
io_schedule_timeout(5*HZ);
@@ -1018,12 +1054,20 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
spin_lock(&sbi->cp_lock);
if (cpc->reason == CP_UMOUNT)
if ((cpc->reason & CP_UMOUNT) &&
le32_to_cpu(ckpt->cp_pack_total_block_count) >
sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
disable_nat_bits(sbi, false);
if (cpc->reason & CP_TRIMMED)
__set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
if (cpc->reason & CP_UMOUNT)
__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
__clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
if (cpc->reason == CP_FASTBOOT)
if (cpc->reason & CP_FASTBOOT)
__set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
else
__clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
@@ -1047,7 +1091,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct f2fs_nm_info *nm_i = NM_I(sbi);
unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
nid_t last_nid = nm_i->next_scan_nid;
block_t start_blk;
unsigned int data_sum_blocks, orphan_blocks;
__u32 crc32 = 0;
@@ -1064,14 +1107,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
return -EIO;
}
next_free_nid(sbi, &last_nid);
/*
* modify checkpoint
* version number is already updated
*/
ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
ckpt->cur_node_segno[i] =
@@ -1090,10 +1130,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
}
ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
ckpt->next_free_nid = cpu_to_le32(last_nid);
/* 2 cp + n data seg summary + orphan inode blocks */
data_sum_blocks = npages_for_summary_flush(sbi, false);
spin_lock(&sbi->cp_lock);
@@ -1130,6 +1166,27 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk = __start_cp_next_addr(sbi);
/* write nat bits */
if (enabled_nat_bits(sbi, cpc)) {
__u64 cp_ver = cur_cp_version(ckpt);
block_t blk;
cp_ver |= ((__u64)crc32 << 32);
*(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++)
update_meta_page(sbi, nm_i->nat_bits +
(i << F2FS_BLKSIZE_BITS), blk + i);
/* Flush all the NAT BITS pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
sync_meta_pages(sbi, META, LONG_MAX);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
}
}
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
@@ -1189,7 +1246,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
clear_prefree_segments(sbi, cpc);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
clear_sbi_flag(sbi, SBI_NEED_CP);
__set_cp_next_pack(sbi);
@@ -1219,8 +1275,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
mutex_lock(&sbi->cp_mutex);
if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
(cpc->reason == CP_DISCARD && !sbi->discard_blks)))
((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
((cpc->reason & CP_DISCARD) && !sbi->discard_blks)))
goto out;
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
@@ -1242,15 +1298,20 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_flush_merged_bios(sbi);
/* this is the case of multiple fstrims without any changes */
if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) {
f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt);
f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries);
f2fs_bug_on(sbi, prefree_segments(sbi));
flush_sit_entries(sbi, cpc);
clear_prefree_segments(sbi, cpc);
f2fs_wait_all_discard_bio(sbi);
unblock_operations(sbi);
goto out;
if (cpc->reason & CP_DISCARD) {
if (!exist_trim_candidates(sbi, cpc)) {
unblock_operations(sbi);
goto out;
}
if (NM_I(sbi)->dirty_nat_cnt == 0 &&
SIT_I(sbi)->dirty_sentries == 0 &&
prefree_segments(sbi) == 0) {
flush_sit_entries(sbi, cpc);
clear_prefree_segments(sbi, cpc);
unblock_operations(sbi);
goto out;
}
}
/*
@@ -1262,18 +1323,20 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
/* write cached NAT/SIT entries to NAT/SIT area */
flush_nat_entries(sbi);
flush_nat_entries(sbi, cpc);
flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
err = do_checkpoint(sbi, cpc);
f2fs_wait_all_discard_bio(sbi);
if (err)
release_discard_addrs(sbi);
else
clear_prefree_segments(sbi, cpc);
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
if (cpc->reason == CP_RECOVERY)
if (cpc->reason & CP_RECOVERY)
f2fs_msg(sbi->sb, KERN_NOTICE,
"checkpoint: version = %llx", ckpt_ver);

File diff suppressed because it is too large Load Diff

View File

@@ -50,7 +50,27 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->wb_bios = atomic_read(&sbi->nr_wb_bios);
si->aw_cnt = atomic_read(&sbi->aw_cnt);
si->vw_cnt = atomic_read(&sbi->vw_cnt);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
if (SM_I(sbi) && SM_I(sbi)->fcc_info) {
si->nr_flushed =
atomic_read(&SM_I(sbi)->fcc_info->issued_flush);
si->nr_flushing =
atomic_read(&SM_I(sbi)->fcc_info->issing_flush);
}
if (SM_I(sbi) && SM_I(sbi)->dcc_info) {
si->nr_discarded =
atomic_read(&SM_I(sbi)->dcc_info->issued_discard);
si->nr_discarding =
atomic_read(&SM_I(sbi)->dcc_info->issing_discard);
si->nr_discard_cmd =
atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks;
}
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
@@ -61,6 +81,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->inline_xattr = atomic_read(&sbi->inline_xattr);
si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir);
si->append = sbi->im[APPEND_INO].ino_num;
si->update = sbi->im[UPDATE_INO].ino_num;
si->orphans = sbi->im[ORPHAN_INO].ino_num;
si->utilization = utilization(sbi);
@@ -74,7 +96,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
si->sits = MAIN_SEGS(sbi);
si->dirty_sits = SIT_I(sbi)->dirty_sentries;
si->fnids = NM_I(sbi)->fcnt;
si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST];
si->avail_nids = NM_I(sbi)->available_nids;
si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST];
si->bg_gc = sbi->bg_gc;
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
@@ -87,8 +111,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
si->curseg[i] = curseg->segno;
si->cursec[i] = curseg->segno / sbi->segs_per_sec;
si->curzone[i] = si->cursec[i] / sbi->secs_per_zone;
si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]);
}
for (i = 0; i < 2; i++) {
@@ -112,10 +136,10 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
bimodal = 0;
total_vblocks = 0;
blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
blks_per_sec = BLKS_PER_SEC(sbi);
hblks_per_sec = blks_per_sec / 2;
for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
vblocks = get_valid_blocks(sbi, segno, true);
dist = abs(vblocks - hblks_per_sec);
bimodal += dist * dist;
@@ -144,7 +168,11 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
if (si->base_mem)
goto get_cache;
si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
/* build stat */
si->base_mem = sizeof(struct f2fs_stat_info);
/* build superblock */
si->base_mem += sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize;
si->base_mem += 2 * sizeof(struct f2fs_inode_info);
si->base_mem += sizeof(*sbi->ckpt);
si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE;
@@ -181,6 +209,10 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
/* build nm */
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
si->base_mem += NM_I(sbi)->nat_blocks / 8;
si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
get_cache:
si->cache_mem = 0;
@@ -190,11 +222,18 @@ get_cache:
si->cache_mem += sizeof(struct f2fs_gc_kthread);
/* build merge flush thread */
if (SM_I(sbi)->cmd_control_info)
if (SM_I(sbi)->fcc_info)
si->cache_mem += sizeof(struct flush_cmd_control);
if (SM_I(sbi)->dcc_info) {
si->cache_mem += sizeof(struct discard_cmd_control);
si->cache_mem += sizeof(struct discard_cmd) *
atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt);
}
/* free nids */
si->cache_mem += NM_I(sbi)->fcnt * sizeof(struct free_nid);
si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]) *
sizeof(struct free_nid);
si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
sizeof(struct nat_entry_set);
@@ -250,8 +289,8 @@ static int stat_show(struct seq_file *s, void *v)
si->inline_inode);
seq_printf(s, " - Inline_dentry Inode: %u\n",
si->inline_dir);
seq_printf(s, " - Orphan Inode: %u\n",
si->orphans);
seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
si->orphans, si->append, si->update);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
@@ -310,8 +349,16 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - inmem: %4d, wb_bios: %4d\n",
si->inmem_pages, si->wb_bios);
seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), "
"Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
si->nr_wb_cp_data, si->nr_wb_data,
si->nr_flushing, si->nr_flushed,
si->nr_discarding, si->nr_discarded,
si->nr_discard_cmd, si->undiscard_blks);
seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), "
"volatile IO: %4d (Max. %4d)\n",
si->inmem_pages, si->aw_cnt, si->max_aw_cnt,
si->vw_cnt, si->max_vw_cnt);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
@@ -324,8 +371,8 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_imeta);
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
si->dirty_nats, si->nats, si->dirty_sits, si->sits);
seq_printf(s, " - free_nids: %9d\n",
si->fnids);
seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n",
si->free_nids, si->avail_nids, si->alloc_nids);
seq_puts(s, "\nDistribution of User Blocks:");
seq_puts(s, " [ valid | invalid | free ]\n");
seq_puts(s, " [");
@@ -410,6 +457,11 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->inplace_count, 0);
atomic_set(&sbi->aw_cnt, 0);
atomic_set(&sbi->vw_cnt, 0);
atomic_set(&sbi->max_aw_cnt, 0);
atomic_set(&sbi->max_vw_cnt, 0);
mutex_lock(&f2fs_stat_mutex);
list_add_tail(&si->stat_list, &f2fs_stat_list);
mutex_unlock(&f2fs_stat_mutex);

View File

@@ -94,7 +94,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
make_dentry_ptr_block(NULL, &d, dentry_blk);
de = find_target_dentry(fname, namehash, max_slots, &d);
if (de)
*res_page = dentry_page;
@@ -111,8 +111,6 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
int max_len = 0;
struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
struct fscrypt_str *name = &fname->disk_name;
if (max_slots)
*max_slots = 0;
@@ -130,29 +128,11 @@ struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
continue;
}
if (de->hash_code != namehash)
goto not_match;
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
#ifdef CONFIG_F2FS_FS_ENCRYPTION
if (unlikely(!name->name)) {
if (fname->usr_fname->name[0] == '_') {
if (de_name.len > 32 &&
!memcmp(de_name.name + ((de_name.len - 17) & ~15),
fname->crypto_buf.name + 8, 16))
goto found;
goto not_match;
}
name->name = fname->crypto_buf.name;
name->len = fname->crypto_buf.len;
}
#endif
if (de_name.len == name->len &&
!memcmp(de_name.name, name->name, name->len))
if (de->hash_code == namehash &&
fscrypt_match_name(fname, d->filename[bit_pos],
le16_to_cpu(de->name_len)))
goto found;
not_match:
if (max_slots && max_len > *max_slots)
*max_slots = max_len;
max_len = 0;
@@ -212,13 +192,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
f2fs_put_page(dentry_page, 0);
}
/* This is to increase the speed of f2fs_create */
if (!de && room) {
F2FS_I(dir)->task = current;
if (F2FS_I(dir)->chash != namehash) {
F2FS_I(dir)->chash = namehash;
F2FS_I(dir)->clevel = level;
}
if (!de && room && F2FS_I(dir)->chash != namehash) {
F2FS_I(dir)->chash = namehash;
F2FS_I(dir)->clevel = level;
}
return de;
@@ -259,6 +235,9 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
break;
}
out:
/* This is to increase the speed of f2fs_create */
if (!de)
F2FS_I(dir)->task = current;
return de;
}
@@ -277,7 +256,10 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
err = fscrypt_setup_filename(dir, child, 1, &fname);
if (err) {
*res_page = ERR_PTR(err);
if (err == -ENOENT)
*res_page = NULL;
else
*res_page = ERR_PTR(err);
return NULL;
}
@@ -322,7 +304,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
set_page_dirty(page);
dir->i_mtime = dir->i_ctime = current_time(dir);
f2fs_mark_inode_dirty_sync(dir);
f2fs_mark_inode_dirty_sync(dir, false);
f2fs_put_page(page, 1);
}
@@ -339,24 +321,6 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
set_page_dirty(ipage);
}
int update_dent_inode(struct inode *inode, struct inode *to,
const struct qstr *name)
{
struct page *page;
if (file_enc_name(to))
return 0;
page = get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(page))
return PTR_ERR(page);
init_dent_inode(name, page);
f2fs_put_page(page, 1);
return 0;
}
void do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d)
{
@@ -386,7 +350,7 @@ static int make_empty_dir(struct inode *inode,
dentry_blk = kmap_atomic(dentry_page);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
make_dentry_ptr_block(NULL, &d, dentry_blk);
do_make_empty_dir(inode, parent, &d);
kunmap_atomic(dentry_blk);
@@ -440,8 +404,11 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
set_cold_node(inode, page);
}
if (new_name)
if (new_name) {
init_dent_inode(new_name, page);
if (f2fs_encrypted_inode(dir))
file_set_enc_name(inode);
}
/*
* This file should be checkpointed during fsync.
@@ -475,7 +442,7 @@ void update_parent_metadata(struct inode *dir, struct inode *inode,
clear_inode_flag(inode, FI_NEW_INODE);
}
dir->i_mtime = dir->i_ctime = current_time(dir);
f2fs_mark_inode_dirty_sync(dir);
f2fs_mark_inode_dirty_sync(dir, false);
if (F2FS_I(dir)->i_current_depth != current_depth)
f2fs_i_depth_write(dir, current_depth);
@@ -554,8 +521,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
start:
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH))
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
f2fs_show_injection_info(FAULT_DIR_DEPTH);
return -ENOSPC;
}
#endif
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
@@ -599,11 +568,9 @@ add_dentry:
err = PTR_ERR(page);
goto fail;
}
if (f2fs_encrypted_inode(dir))
file_set_enc_name(inode);
}
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1);
make_dentry_ptr_block(NULL, &d, dentry_blk);
f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos);
set_page_dirty(dentry_page);
@@ -750,7 +717,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
dentry_blk = page_address(page);
bit_pos = dentry - dentry_blk->dentry;
for (i = 0; i < slots; i++)
clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
__clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap);
/* Let's check and deallocate this dentry page */
bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap,
@@ -760,7 +727,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
set_page_dirty(page);
dir->i_ctime = dir->i_mtime = current_time(dir);
f2fs_mark_inode_dirty_sync(dir);
f2fs_mark_inode_dirty_sync(dir, false);
if (inode)
f2fs_drop_nlink(dir, inode);
@@ -771,6 +738,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
ClearPagePrivate(page);
ClearPageUptodate(page);
inode_dec_dirty_pages(dir);
remove_dirty_inode(dir);
}
f2fs_put_page(page, 1);
}
@@ -813,7 +781,7 @@ bool f2fs_empty_dir(struct inode *dir)
return true;
}
bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
unsigned int start_pos, struct fscrypt_str *fstr)
{
unsigned char d_type = DT_UNKNOWN;
@@ -848,7 +816,7 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
(u32)de->hash_code, 0,
&de_name, fstr);
if (err)
return true;
return err;
de_name = *fstr;
fstr->len = save_len;
@@ -856,12 +824,12 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
if (!dir_emit(ctx, de_name.name, de_name.len,
le32_to_cpu(de->ino), d_type))
return true;
return 1;
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
ctx->pos = start_pos + bit_pos;
}
return false;
return 0;
}
static int f2fs_readdir(struct file *file, struct dir_context *ctx)
@@ -900,17 +868,21 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
dentry_page = get_lock_data_page(inode, n, false);
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
if (err == -ENOENT)
if (err == -ENOENT) {
err = 0;
continue;
else
} else {
goto out;
}
}
dentry_blk = kmap(dentry_page);
make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
make_dentry_ptr_block(inode, &d, dentry_blk);
if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) {
err = f2fs_fill_dentries(ctx, &d,
n * NR_DENTRY_IN_BLOCK, &fstr);
if (err) {
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
break;
@@ -920,10 +892,9 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
err = 0;
out:
fscrypt_fname_free_buffer(&fstr);
return err;
return err < 0 ? err : 0;
}
static int f2fs_dir_open(struct inode *inode, struct file *filp)

View File

@@ -18,6 +18,179 @@
#include "node.h"
#include <trace/events/f2fs.h>
static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
unsigned int ofs)
{
if (cached_re) {
if (cached_re->ofs <= ofs &&
cached_re->ofs + cached_re->len > ofs) {
return cached_re;
}
}
return NULL;
}
static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root,
unsigned int ofs)
{
struct rb_node *node = root->rb_node;
struct rb_entry *re;
while (node) {
re = rb_entry(node, struct rb_entry, rb_node);
if (ofs < re->ofs)
node = node->rb_left;
else if (ofs >= re->ofs + re->len)
node = node->rb_right;
else
return re;
}
return NULL;
}
struct rb_entry *__lookup_rb_tree(struct rb_root *root,
struct rb_entry *cached_re, unsigned int ofs)
{
struct rb_entry *re;
re = __lookup_rb_tree_fast(cached_re, ofs);
if (!re)
return __lookup_rb_tree_slow(root, ofs);
return re;
}
struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root *root, struct rb_node **parent,
unsigned int ofs)
{
struct rb_node **p = &root->rb_node;
struct rb_entry *re;
while (*p) {
*parent = *p;
re = rb_entry(*parent, struct rb_entry, rb_node);
if (ofs < re->ofs)
p = &(*p)->rb_left;
else if (ofs >= re->ofs + re->len)
p = &(*p)->rb_right;
else
f2fs_bug_on(sbi, 1);
}
return p;
}
/*
* lookup rb entry in position of @ofs in rb-tree,
* if hit, return the entry, otherwise, return NULL
* @prev_ex: extent before ofs
* @next_ex: extent after ofs
* @insert_p: insert point for new extent at ofs
* in order to simpfy the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root,
struct rb_entry *cached_re,
unsigned int ofs,
struct rb_entry **prev_entry,
struct rb_entry **next_entry,
struct rb_node ***insert_p,
struct rb_node **insert_parent,
bool force)
{
struct rb_node **pnode = &root->rb_node;
struct rb_node *parent = NULL, *tmp_node;
struct rb_entry *re = cached_re;
*insert_p = NULL;
*insert_parent = NULL;
*prev_entry = NULL;
*next_entry = NULL;
if (RB_EMPTY_ROOT(root))
return NULL;
if (re) {
if (re->ofs <= ofs && re->ofs + re->len > ofs)
goto lookup_neighbors;
}
while (*pnode) {
parent = *pnode;
re = rb_entry(*pnode, struct rb_entry, rb_node);
if (ofs < re->ofs)
pnode = &(*pnode)->rb_left;
else if (ofs >= re->ofs + re->len)
pnode = &(*pnode)->rb_right;
else
goto lookup_neighbors;
}
*insert_p = pnode;
*insert_parent = parent;
re = rb_entry(parent, struct rb_entry, rb_node);
tmp_node = parent;
if (parent && ofs > re->ofs)
tmp_node = rb_next(parent);
*next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
tmp_node = parent;
if (parent && ofs < re->ofs)
tmp_node = rb_prev(parent);
*prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
return NULL;
lookup_neighbors:
if (ofs == re->ofs || force) {
/* lookup prev node for merging backward later */
tmp_node = rb_prev(&re->rb_node);
*prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
}
if (ofs == re->ofs + re->len - 1 || force) {
/* lookup next node for merging frontward later */
tmp_node = rb_next(&re->rb_node);
*next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node);
}
return re;
}
bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi,
struct rb_root *root)
{
#ifdef CONFIG_F2FS_CHECK_FS
struct rb_node *cur = rb_first(root), *next;
struct rb_entry *cur_re, *next_re;
if (!cur)
return true;
while (cur) {
next = rb_next(cur);
if (!next)
return true;
cur_re = rb_entry(cur, struct rb_entry, rb_node);
next_re = rb_entry(next, struct rb_entry, rb_node);
if (cur_re->ofs + cur_re->len > next_re->ofs) {
f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, "
"cur(%u, %u) next(%u, %u)",
cur_re->ofs, cur_re->len,
next_re->ofs, next_re->len);
return false;
}
cur = next;
}
#endif
return true;
}
static struct kmem_cache *extent_tree_slab;
static struct kmem_cache *extent_node_slab;
@@ -77,7 +250,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
struct extent_tree *et;
nid_t ino = inode->i_ino;
down_write(&sbi->extent_tree_lock);
mutex_lock(&sbi->extent_tree_lock);
et = radix_tree_lookup(&sbi->extent_tree_root, ino);
if (!et) {
et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
@@ -94,7 +267,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
atomic_dec(&sbi->total_zombie_tree);
list_del_init(&et->list);
}
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
/* never died until evict_inode */
F2FS_I(inode)->extent_tree = et;
@@ -102,36 +275,6 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
return et;
}
static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, unsigned int fofs)
{
struct rb_node *node = et->root.rb_node;
struct extent_node *en = et->cached_en;
if (en) {
struct extent_info *cei = &en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) {
stat_inc_cached_node_hit(sbi);
return en;
}
}
while (node) {
en = rb_entry(node, struct extent_node, rb_node);
if (fofs < en->ei.fofs) {
node = node->rb_left;
} else if (fofs >= en->ei.fofs + en->ei.len) {
node = node->rb_right;
} else {
stat_inc_rbtree_node_hit(sbi);
return en;
}
}
return NULL;
}
static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei)
{
@@ -172,7 +315,7 @@ static void __drop_largest_extent(struct inode *inode,
if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) {
largest->len = 0;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
}
@@ -237,17 +380,24 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
goto out;
}
en = __lookup_extent_tree(sbi, et, pgofs);
if (en) {
*ei = en->ei;
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list)) {
list_move_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
}
spin_unlock(&sbi->extent_lock);
ret = true;
en = (struct extent_node *)__lookup_rb_tree(&et->root,
(struct rb_entry *)et->cached_en, pgofs);
if (!en)
goto out;
if (en == et->cached_en)
stat_inc_cached_node_hit(sbi);
else
stat_inc_rbtree_node_hit(sbi);
*ei = en->ei;
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list)) {
list_move_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
}
spin_unlock(&sbi->extent_lock);
ret = true;
out:
stat_inc_total_hit(sbi);
read_unlock(&et->lock);
@@ -256,87 +406,6 @@ out:
return ret;
}
/*
* lookup extent at @fofs, if hit, return the extent
* if not, return NULL and
* @prev_ex: extent before fofs
* @next_ex: extent after fofs
* @insert_p: insert point for new extent at fofs
* in order to simpfy the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
unsigned int fofs,
struct extent_node **prev_ex,
struct extent_node **next_ex,
struct rb_node ***insert_p,
struct rb_node **insert_parent)
{
struct rb_node **pnode = &et->root.rb_node;
struct rb_node *parent = NULL, *tmp_node;
struct extent_node *en = et->cached_en;
*insert_p = NULL;
*insert_parent = NULL;
*prev_ex = NULL;
*next_ex = NULL;
if (RB_EMPTY_ROOT(&et->root))
return NULL;
if (en) {
struct extent_info *cei = &en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
goto lookup_neighbors;
}
while (*pnode) {
parent = *pnode;
en = rb_entry(*pnode, struct extent_node, rb_node);
if (fofs < en->ei.fofs)
pnode = &(*pnode)->rb_left;
else if (fofs >= en->ei.fofs + en->ei.len)
pnode = &(*pnode)->rb_right;
else
goto lookup_neighbors;
}
*insert_p = pnode;
*insert_parent = parent;
en = rb_entry(parent, struct extent_node, rb_node);
tmp_node = parent;
if (parent && fofs > en->ei.fofs)
tmp_node = rb_next(parent);
*next_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
tmp_node = parent;
if (parent && fofs < en->ei.fofs)
tmp_node = rb_prev(parent);
*prev_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
return NULL;
lookup_neighbors:
if (fofs == en->ei.fofs) {
/* lookup prev node for merging backward later */
tmp_node = rb_prev(&en->rb_node);
*prev_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
}
if (fofs == en->ei.fofs + en->ei.len - 1) {
/* lookup next node for merging frontward later */
tmp_node = rb_next(&en->rb_node);
*next_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
}
return en;
}
static struct extent_node *__try_merge_extent_node(struct inode *inode,
struct extent_tree *et, struct extent_info *ei,
struct extent_node *prev_ex,
@@ -391,17 +460,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode,
goto do_insert;
}
while (*p) {
parent = *p;
en = rb_entry(parent, struct extent_node, rb_node);
if (ei->fofs < en->ei.fofs)
p = &(*p)->rb_left;
else if (ei->fofs >= en->ei.fofs + en->ei.len)
p = &(*p)->rb_right;
else
f2fs_bug_on(sbi, 1);
}
p = __lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs);
do_insert:
en = __attach_extent_node(sbi, et, ei, parent, p);
if (!en)
@@ -417,7 +476,7 @@ do_insert:
return en;
}
static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
static void f2fs_update_extent_tree_range(struct inode *inode,
pgoff_t fofs, block_t blkaddr, unsigned int len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -430,7 +489,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
unsigned int pos = (unsigned int)fofs;
if (!et)
return false;
return;
trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
@@ -438,7 +497,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
write_unlock(&et->lock);
return false;
return;
}
prev = et->largest;
@@ -451,8 +510,11 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
__drop_largest_extent(inode, fofs, len);
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
&insert_p, &insert_parent);
en = (struct extent_node *)__lookup_rb_tree_ret(&et->root,
(struct rb_entry *)et->cached_en, fofs,
(struct rb_entry **)&prev_en,
(struct rb_entry **)&next_en,
&insert_p, &insert_parent, false);
if (!en)
en = next_en;
@@ -493,9 +555,8 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
if (!next_en) {
struct rb_node *node = rb_next(&en->rb_node);
next_en = node ?
rb_entry(node, struct extent_node, rb_node)
: NULL;
next_en = rb_entry_safe(node, struct extent_node,
rb_node);
}
if (parts)
@@ -536,8 +597,6 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
__free_extent_tree(sbi, et);
write_unlock(&et->lock);
return !__is_extent_same(&prev, &et->largest);
}
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -553,7 +612,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
if (!atomic_read(&sbi->total_zombie_tree))
goto free_node;
if (!down_write_trylock(&sbi->extent_tree_lock))
if (!mutex_trylock(&sbi->extent_tree_lock))
goto out;
/* 1. remove unreferenced extent tree */
@@ -575,11 +634,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
goto unlock_out;
cond_resched();
}
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
free_node:
/* 2. remove LRU extent entries */
if (!down_write_trylock(&sbi->extent_tree_lock))
if (!mutex_trylock(&sbi->extent_tree_lock))
goto out;
remained = nr_shrink - (node_cnt + tree_cnt);
@@ -609,7 +668,7 @@ free_node:
spin_unlock(&sbi->extent_lock);
unlock_out:
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
out:
trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
@@ -656,10 +715,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
if (inode->i_nlink && !is_bad_inode(inode) &&
atomic_read(&et->node_cnt)) {
down_write(&sbi->extent_tree_lock);
mutex_lock(&sbi->extent_tree_lock);
list_add_tail(&et->list, &sbi->zombie_list);
atomic_inc(&sbi->total_zombie_tree);
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
return;
}
@@ -667,12 +726,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
node_cnt = f2fs_destroy_extent_node(inode);
/* delete extent tree entry in radix tree */
down_write(&sbi->extent_tree_lock);
mutex_lock(&sbi->extent_tree_lock);
f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
atomic_dec(&sbi->total_ext_tree);
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
F2FS_I(inode)->extent_tree = NULL;
@@ -719,7 +778,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
void init_extent_cache_info(struct f2fs_sb_info *sbi)
{
INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
init_rwsem(&sbi->extent_tree_lock);
mutex_init(&sbi->extent_tree_lock);
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
atomic_set(&sbi->total_ext_tree, 0);

File diff suppressed because it is too large Load Diff

View File

@@ -20,6 +20,7 @@
#include <linux/uaccess.h>
#include <linux/mount.h>
#include <linux/pagevec.h>
#include <linux/uio.h>
#include <linux/uuid.h>
#include <linux/file.h>
@@ -94,8 +95,6 @@ mapped:
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
/* if gced page is attached, don't write to cold segment */
clear_cold_data(page);
out:
sb_end_pagefault(inode->i_sb);
f2fs_update_time(sbi, REQ_TIME);
@@ -118,11 +117,6 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
if (!dentry)
return 0;
if (update_dent_inode(inode, inode, &dentry->d_name)) {
dput(dentry);
return 0;
}
*pino = parent_ino(dentry);
dput(dentry);
return 1;
@@ -143,8 +137,6 @@ static inline bool need_do_checkpoint(struct inode *inode)
need_cp = true;
else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
need_cp = true;
else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
need_cp = true;
else if (test_opt(sbi, FASTBOOT))
need_cp = true;
else if (sbi->active_logs == 2)
@@ -170,7 +162,6 @@ static void try_to_fix_pino(struct inode *inode)
nid_t pino;
down_write(&fi->i_sem);
fi->xattr_ver = 0;
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
get_parent_ino(inode, &pino)) {
f2fs_i_pino_write(inode, pino);
@@ -210,7 +201,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
}
/* if the inode is dirty, let's recover all the time */
if (!datasync && !f2fs_skip_inode_update(inode)) {
if (!f2fs_skip_inode_update(inode, datasync)) {
f2fs_write_inode(inode, NULL);
goto go_write;
}
@@ -264,7 +255,7 @@ sync_nodes:
}
if (need_inode_block_update(sbi, ino)) {
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
f2fs_write_inode(inode, NULL);
goto sync_nodes;
}
@@ -279,7 +270,8 @@ sync_nodes:
flush_out:
remove_ino_entry(sbi, ino, UPDATE_INO);
clear_inode_flag(inode, FI_UPDATE_WRITE);
ret = f2fs_issue_flush(sbi);
if (!atomic)
ret = f2fs_issue_flush(sbi);
f2fs_update_time(sbi, REQ_TIME);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
@@ -532,7 +524,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
page = get_lock_data_page(inode, index, true);
if (IS_ERR(page))
return 0;
return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
truncate_out:
f2fs_wait_on_page_writeback(page, DATA, true);
zero_user(page, offset, PAGE_SIZE - offset);
@@ -570,8 +562,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
}
if (f2fs_has_inline_data(inode)) {
if (truncate_inline_inode(ipage, from))
set_page_dirty(ipage);
truncate_inline_inode(inode, ipage, from);
f2fs_put_page(ipage, 1);
truncate_page = true;
goto out;
@@ -620,6 +611,12 @@ int f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
f2fs_show_injection_info(FAULT_TRUNCATE);
return -EIO;
}
#endif
/* we should check inline_data size */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
@@ -632,12 +629,12 @@ int f2fs_truncate(struct inode *inode)
return err;
inode->i_mtime = inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, false);
return 0;
}
int f2fs_getattr(struct vfsmount *mnt,
struct dentry *dentry, struct kstat *stat)
struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = d_inode(dentry);
generic_fillattr(inode, stat);
@@ -679,6 +676,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int err;
bool size_changed = false;
err = setattr_prepare(dentry, attr);
if (err)
@@ -694,7 +692,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
err = f2fs_truncate(inode);
if (err)
return err;
f2fs_balance_fs(F2FS_I_SB(inode), true);
} else {
/*
* do not trim all blocks after i_size if target size is
@@ -710,6 +707,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
inode->i_mtime = inode->i_ctime = current_time(inode);
}
size_changed = true;
}
__setattr_copy(inode, attr);
@@ -722,7 +721,12 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
}
f2fs_mark_inode_dirty_sync(inode);
/* file size may changed here */
f2fs_mark_inode_dirty_sync(inode, size_changed);
/* inode change will produce dirty node pages flushed by checkpoint */
f2fs_balance_fs(F2FS_I_SB(inode), true);
return err;
}
@@ -1008,11 +1012,11 @@ static int __exchange_data_block(struct inode *src_inode,
while (len) {
olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len);
src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * olen, GFP_KERNEL);
src_blkaddr = kvzalloc(sizeof(block_t) * olen, GFP_KERNEL);
if (!src_blkaddr)
return -ENOMEM;
do_replace = f2fs_kvzalloc(sizeof(int) * olen, GFP_KERNEL);
do_replace = kvzalloc(sizeof(int) * olen, GFP_KERNEL);
if (!do_replace) {
kvfree(src_blkaddr);
return -ENOMEM;
@@ -1184,8 +1188,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
if (offset + len > new_size)
new_size = offset + len;
new_size = max_t(loff_t, new_size, offset + len);
} else {
if (off_start) {
@@ -1218,6 +1220,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = f2fs_do_zero_range(&dn, index, end);
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
f2fs_balance_fs(sbi, dn.node_changed);
if (ret)
goto out;
@@ -1250,8 +1255,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
int ret = 0;
new_size = i_size_read(inode) + len;
if (new_size > inode->i_sb->s_maxbytes)
return -EFBIG;
ret = inode_newsize_ok(inode, new_size);
if (ret)
return ret;
if (offset >= i_size_read(inode))
return -EINVAL;
@@ -1313,15 +1319,15 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
pgoff_t pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
int ret;
int err;
ret = inode_newsize_ok(inode, (len + offset));
if (ret)
return ret;
err = inode_newsize_ok(inode, (len + offset));
if (err)
return err;
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
f2fs_balance_fs(sbi, true);
@@ -1333,12 +1339,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (off_end)
map.m_len++;
ret = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
if (ret) {
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
if (err) {
pgoff_t last_off;
if (!map.m_len)
return ret;
return err;
last_off = map.m_lblk + map.m_len - 1;
@@ -1352,7 +1358,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
f2fs_i_size_write(inode, new_size);
return ret;
return err;
}
static long f2fs_fallocate(struct file *file, int mode,
@@ -1393,7 +1399,9 @@ static long f2fs_fallocate(struct file *file, int mode,
if (!ret) {
inode->i_mtime = inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, false);
if (mode & FALLOC_FL_KEEP_SIZE)
file_set_keep_isize(inode);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
}
@@ -1419,6 +1427,7 @@ static int f2fs_release_file(struct inode *inode, struct file *filp)
drop_inmem_pages(inode);
if (f2fs_is_volatile_file(inode)) {
clear_inode_flag(inode, FI_VOLATILE_FILE);
stat_dec_volatile_write(inode);
set_inode_flag(inode, FI_DROP_CACHE);
filemap_fdatawrite(inode->i_mapping);
clear_inode_flag(inode, FI_DROP_CACHE);
@@ -1465,10 +1474,10 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
if (ret)
return ret;
flags = f2fs_mask_flags(inode->i_mode, flags);
inode_lock(inode);
flags = f2fs_mask_flags(inode->i_mode, flags);
oldflags = fi->i_flags;
if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
@@ -1482,10 +1491,11 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
flags = flags & FS_FL_USER_MODIFIABLE;
flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
fi->i_flags = flags;
inode_unlock(inode);
inode->i_ctime = current_time(inode);
f2fs_set_inode_flags(inode);
inode_unlock(inode);
out:
mnt_drop_write_file(filp);
return ret;
@@ -1506,6 +1516,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -1520,17 +1533,24 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
goto out;
set_inode_flag(inode, FI_ATOMIC_FILE);
set_inode_flag(inode, FI_HOT_DATA);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
if (!get_dirty_pages(inode))
goto out;
goto inc_stat;
f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret)
if (ret) {
clear_inode_flag(inode, FI_ATOMIC_FILE);
goto out;
}
inc_stat:
stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
out:
inode_unlock(inode);
mnt_drop_write_file(filp);
@@ -1555,15 +1575,18 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
goto err_out;
if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(inode, FI_ATOMIC_FILE);
ret = commit_inmem_pages(inode);
if (ret) {
set_inode_flag(inode, FI_ATOMIC_FILE);
if (ret)
goto err_out;
}
}
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
if (!ret) {
clear_inode_flag(inode, FI_ATOMIC_FILE);
stat_dec_atomic_write(inode);
}
} else {
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
err_out:
inode_unlock(inode);
mnt_drop_write_file(filp);
@@ -1578,6 +1601,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
ret = mnt_want_write_file(filp);
if (ret)
return ret;
@@ -1591,6 +1617,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
if (ret)
goto out;
stat_inc_volatile_write(inode);
stat_update_max_volatile_write(inode);
set_inode_flag(inode, FI_VOLATILE_FILE);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
out:
@@ -1646,6 +1675,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
drop_inmem_pages(inode);
if (f2fs_is_volatile_file(inode)) {
clear_inode_flag(inode, FI_VOLATILE_FILE);
stat_dec_volatile_write(inode);
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
@@ -1752,31 +1782,16 @@ static bool uuid_is_nonzero(__u8 u[16])
static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
{
struct fscrypt_policy policy;
struct inode *inode = file_inode(filp);
if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg,
sizeof(policy)))
return -EFAULT;
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return fscrypt_process_policy(filp, &policy);
return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
}
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
{
struct fscrypt_policy policy;
struct inode *inode = file_inode(filp);
int err;
err = fscrypt_get_policy(inode, &policy);
if (err)
return err;
if (copy_to_user((struct fscrypt_policy __user *)arg, &policy, sizeof(policy)))
return -EFAULT;
return 0;
return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
}
static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
@@ -1842,7 +1857,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
mutex_lock(&sbi->gc_mutex);
}
ret = f2fs_gc(sbi, sync);
ret = f2fs_gc(sbi, sync, true, NULL_SEGNO);
out:
mnt_drop_write_file(filp);
return ret;
@@ -1876,17 +1891,16 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
{
struct inode *inode = file_inode(filp);
struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
struct extent_info ei;
struct extent_info ei = {0,0,0};
pgoff_t pg_start, pg_end;
unsigned int blk_per_seg = sbi->blocks_per_seg;
unsigned int total = 0, sec_num;
unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg;
block_t blk_end = 0;
bool fragmented = false;
int err;
/* if in-place-update policy is enabled, don't waste time here */
if (need_inplace_update(inode))
if (need_inplace_update_policy(inode, NULL))
return -EINVAL;
pg_start = range->start >> PAGE_SHIFT;
@@ -1944,7 +1958,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
map.m_lblk = pg_start;
map.m_len = pg_end - pg_start;
sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec;
sec_num = (map.m_len + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi);
/*
* make sure there are enough free section for LFS allocation, this can
@@ -2021,42 +2035,40 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!S_ISREG(inode->i_mode))
if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode))
return -EINVAL;
if (f2fs_readonly(sbi->sb))
return -EROFS;
if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
sizeof(range)))
return -EFAULT;
/* verify alignment of offset & size */
if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1))
return -EINVAL;
if (unlikely((range.start + range.len) >> PAGE_SHIFT >
sbi->max_file_blocks))
return -EINVAL;
err = mnt_want_write_file(filp);
if (err)
return err;
if (f2fs_readonly(sbi->sb)) {
err = -EROFS;
goto out;
}
if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
sizeof(range))) {
err = -EFAULT;
goto out;
}
/* verify alignment of offset & size */
if (range.start & (F2FS_BLKSIZE - 1) ||
range.len & (F2FS_BLKSIZE - 1)) {
err = -EINVAL;
goto out;
}
err = f2fs_defragment_range(sbi, filp, &range);
mnt_drop_write_file(filp);
f2fs_update_time(sbi, REQ_TIME);
if (err < 0)
goto out;
return err;
if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
sizeof(range)))
err = -EFAULT;
out:
mnt_drop_write_file(filp);
return err;
return -EFAULT;
return 0;
}
static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
@@ -2190,6 +2202,8 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
range.pos_out, range.len);
mnt_drop_write_file(filp);
if (err)
goto err_out;
if (copy_to_user((struct f2fs_move_range __user *)arg,
&range, sizeof(range)))
@@ -2199,6 +2213,69 @@ err_out:
return err;
}
static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct sit_info *sm = SIT_I(sbi);
unsigned int start_segno = 0, end_segno = 0;
unsigned int dev_start_segno = 0, dev_end_segno = 0;
struct f2fs_flush_device range;
int ret;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (f2fs_readonly(sbi->sb))
return -EROFS;
if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
sizeof(range)))
return -EFAULT;
if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num ||
sbi->segs_per_sec != 1) {
f2fs_msg(sbi->sb, KERN_WARNING,
"Can't flush %u in %d for segs_per_sec %u != 1\n",
range.dev_num, sbi->s_ndevs,
sbi->segs_per_sec);
return -EINVAL;
}
ret = mnt_want_write_file(filp);
if (ret)
return ret;
if (range.dev_num != 0)
dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
start_segno = sm->last_victim[FLUSH_DEVICE];
if (start_segno < dev_start_segno || start_segno >= dev_end_segno)
start_segno = dev_start_segno;
end_segno = min(start_segno + range.segments, dev_end_segno);
while (start_segno < end_segno) {
if (!mutex_trylock(&sbi->gc_mutex)) {
ret = -EBUSY;
goto out;
}
sm->last_victim[GC_CB] = end_segno + 1;
sm->last_victim[GC_GREEDY] = end_segno + 1;
sm->last_victim[ALLOC_NEXT] = end_segno + 1;
ret = f2fs_gc(sbi, true, true, start_segno);
if (ret == -EAGAIN)
ret = 0;
else if (ret < 0)
break;
start_segno++;
}
out:
mnt_drop_write_file(filp);
return ret;
}
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
@@ -2236,6 +2313,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_defragment(filp, arg);
case F2FS_IOC_MOVE_RANGE:
return f2fs_ioc_move_range(filp, arg);
case F2FS_IOC_FLUSH_DEVICE:
return f2fs_ioc_flush_device(filp, arg);
default:
return -ENOTTY;
}
@@ -2256,12 +2335,20 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0) {
ret = f2fs_preallocate_blocks(iocb, from);
if (!ret) {
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
blk_finish_plug(&plug);
int err;
if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
set_inode_flag(inode, FI_NO_PREALLOC);
err = f2fs_preallocate_blocks(iocb, from);
if (err) {
inode_unlock(inode);
return err;
}
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
blk_finish_plug(&plug);
clear_inode_flag(inode, FI_NO_PREALLOC);
}
inode_unlock(inode);
@@ -2295,8 +2382,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_GARBAGE_COLLECT:
case F2FS_IOC_WRITE_CHECKPOINT:
case F2FS_IOC_DEFRAGMENT:
break;
case F2FS_IOC_MOVE_RANGE:
case F2FS_IOC_FLUSH_DEVICE:
break;
default:
return -ENOIOCTLCMD;

View File

@@ -48,8 +48,10 @@ static int gc_thread_func(void *data)
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT))
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
#endif
/*
@@ -82,7 +84,7 @@ static int gc_thread_func(void *data)
stat_inc_bggc_count(sbi);
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC)))
if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO))
wait_ms = gc_th->no_gc_sleep_time;
trace_f2fs_background_gc(sbi->sb, wait_ms,
@@ -170,7 +172,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
if (gc_type != FG_GC && p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
p->offset = sbi->last_victim[p->gc_mode];
/* let's select beginning hot/small space first */
if (type == CURSEG_HOT_DATA || IS_NODESEG(type))
p->offset = 0;
else
p->offset = SIT_I(sbi)->last_victim[p->gc_mode];
}
static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
@@ -180,7 +186,7 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
if (p->alloc_mode == SSR)
return sbi->blocks_per_seg;
if (p->gc_mode == GC_GREEDY)
return sbi->blocks_per_seg * p->ofs_unit;
return 2 * sbi->blocks_per_seg * p->ofs_unit;
else if (p->gc_mode == GC_CB)
return UINT_MAX;
else /* No other gc_mode */
@@ -205,7 +211,7 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
continue;
clear_bit(secno, dirty_i->victim_secmap);
return secno * sbi->segs_per_sec;
return GET_SEG_FROM_SEC(sbi, secno);
}
return NULL_SEGNO;
}
@@ -213,8 +219,8 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct sit_info *sit_i = SIT_I(sbi);
unsigned int secno = GET_SECNO(sbi, segno);
unsigned int start = secno * sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
unsigned long long mtime = 0;
unsigned int vblocks;
unsigned char age = 0;
@@ -223,7 +229,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
for (i = 0; i < sbi->segs_per_sec; i++)
mtime += get_seg_entry(sbi, start + i)->mtime;
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
vblocks = get_valid_blocks(sbi, segno, true);
mtime = div_u64(mtime, sbi->segs_per_sec);
vblocks = div_u64(vblocks, sbi->segs_per_sec);
@@ -242,6 +248,16 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
}
static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
unsigned int segno)
{
unsigned int valid_blocks =
get_valid_blocks(sbi, segno, true);
return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
valid_blocks * 2 : valid_blocks;
}
static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
unsigned int segno, struct victim_sel_policy *p)
{
@@ -250,7 +266,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
return get_greedy_cost(sbi, segno);
else
return get_cb_cost(sbi, segno);
}
@@ -279,6 +295,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
unsigned int *result, int gc_type, int type, char alloc_mode)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
unsigned int last_segment = MAIN_SEGS(sbi);
@@ -292,10 +309,18 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
p.min_segno = NULL_SEGNO;
p.min_cost = get_max_cost(sbi, &p);
if (*result != NULL_SEGNO) {
if (IS_DATASEG(get_seg_entry(sbi, *result)->type) &&
get_valid_blocks(sbi, *result, false) &&
!sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result)))
p.min_segno = *result;
goto out;
}
if (p.max_search == 0)
goto out;
last_victim = sbi->last_victim[p.gc_mode];
last_victim = sm->last_victim[p.gc_mode];
if (p.alloc_mode == LFS && gc_type == FG_GC) {
p.min_segno = check_bg_victims(sbi);
if (p.min_segno != NULL_SEGNO)
@@ -308,9 +333,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
if (segno >= last_segment) {
if (sbi->last_victim[p.gc_mode]) {
last_segment = sbi->last_victim[p.gc_mode];
sbi->last_victim[p.gc_mode] = 0;
if (sm->last_victim[p.gc_mode]) {
last_segment =
sm->last_victim[p.gc_mode];
sm->last_victim[p.gc_mode] = 0;
p.offset = 0;
continue;
}
@@ -327,7 +353,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
nsearched++;
}
secno = GET_SECNO(sbi, segno);
secno = GET_SEC_FROM_SEG(sbi, segno);
if (sec_usage_check(sbi, secno))
goto next;
@@ -345,17 +371,18 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
}
next:
if (nsearched >= p.max_search) {
if (!sbi->last_victim[p.gc_mode] && segno <= last_victim)
sbi->last_victim[p.gc_mode] = last_victim + 1;
if (!sm->last_victim[p.gc_mode] && segno <= last_victim)
sm->last_victim[p.gc_mode] = last_victim + 1;
else
sbi->last_victim[p.gc_mode] = segno + 1;
sm->last_victim[p.gc_mode] = segno + 1;
sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi);
break;
}
}
if (p.min_segno != NULL_SEGNO) {
got_it:
if (p.alloc_mode == LFS) {
secno = GET_SECNO(sbi, p.min_segno);
secno = GET_SEC_FROM_SEG(sbi, p.min_segno);
if (gc_type == FG_GC)
sbi->cur_victim_sec = secno;
else
@@ -538,8 +565,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
get_node_info(sbi, nid, dni);
if (sum->version != dni->version) {
f2fs_put_page(node_page, 1);
return false;
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: valid data with mismatched node version.",
__func__);
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
*nofs = ofs_of_node(node_page);
@@ -551,13 +580,14 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return true;
}
static void move_encrypted_block(struct inode *inode, block_t bidx)
static void move_encrypted_block(struct inode *inode, block_t bidx,
unsigned int segno, int off)
{
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
.type = DATA,
.op = REQ_OP_READ,
.op_flags = READ_SYNC,
.op_flags = 0,
.encrypted_page = NULL,
};
struct dnode_of_data dn;
@@ -572,6 +602,12 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
if (!page)
return;
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
if (f2fs_is_atomic_file(inode))
goto out;
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
if (err)
@@ -632,7 +668,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
fio.op = REQ_OP_WRITE;
fio.op_flags = WRITE_SYNC;
fio.op_flags = REQ_SYNC;
fio.new_blkaddr = newaddr;
f2fs_submit_page_mbio(&fio);
@@ -652,7 +688,8 @@ out:
f2fs_put_page(page, 1);
}
static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
unsigned int segno, int off)
{
struct page *page;
@@ -660,6 +697,12 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
if (IS_ERR(page))
return;
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
if (f2fs_is_atomic_file(inode))
goto out;
if (gc_type == BG_GC) {
if (PageWriteback(page))
goto out;
@@ -670,9 +713,11 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
.sbi = F2FS_I_SB(inode),
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = WRITE_SYNC,
.op_flags = REQ_SYNC,
.old_blkaddr = NULL_ADDR,
.page = page,
.encrypted_page = NULL,
.need_lock = true,
};
bool is_dirty = PageDirty(page);
int err;
@@ -680,8 +725,10 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
retry:
set_page_dirty(page);
f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page))
if (clear_page_dirty_for_io(page)) {
inode_dec_dirty_pages(inode);
remove_dirty_inode(inode);
}
set_cold_data(page);
@@ -690,8 +737,6 @@ retry:
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
clear_cold_data(page);
}
out:
f2fs_put_page(page, 1);
@@ -801,9 +846,9 @@ next_step:
start_bidx = start_bidx_of_node(nofs, inode)
+ ofs_in_node;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
move_encrypted_block(inode, start_bidx);
move_encrypted_block(inode, start_bidx, segno, off);
else
move_data_page(inode, start_bidx, gc_type);
move_data_page(inode, start_bidx, gc_type, segno, off);
if (locked) {
up_write(&fi->dio_rwsem[WRITE]);
@@ -864,7 +909,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
GET_SUM_BLOCK(sbi, segno));
f2fs_put_page(sum_page, 0);
if (get_valid_blocks(sbi, segno, 1) == 0 ||
if (get_valid_blocks(sbi, segno, false) == 0 ||
!PageUptodate(sum_page) ||
unlikely(f2fs_cp_error(sbi)))
goto next;
@@ -879,7 +924,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
* - mutex_lock(sentry_lock) - change_curseg()
* - lock_page(sum_page)
*/
if (type == SUM_TYPE_NODE)
gc_node_segment(sbi, sum->entries, segno, gc_type);
else
@@ -898,7 +942,7 @@ next:
blk_finish_plug(&plug);
if (gc_type == FG_GC &&
get_valid_blocks(sbi, start_segno, sbi->segs_per_sec) == 0)
get_valid_blocks(sbi, start_segno, true) == 0)
sec_freed = 1;
stat_inc_call_count(sbi->stat_info);
@@ -906,13 +950,14 @@ next:
return sec_freed;
}
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
bool background, unsigned int segno)
{
unsigned int segno;
int gc_type = sync ? FG_GC : BG_GC;
int sec_freed = 0;
int ret = -EINVAL;
struct cp_control cpc;
unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(GFP_NOFS),
@@ -920,8 +965,6 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
cpc.reason = __get_cp_reason(sbi);
gc_more:
segno = NULL_SEGNO;
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
if (unlikely(f2fs_cp_error(sbi))) {
@@ -929,27 +972,25 @@ gc_more:
goto stop;
}
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed, 0)) {
gc_type = FG_GC;
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
/*
* If there is no victim and no prefree segment but still not
* enough free sections, we should flush dent/node blocks and do
* garbage collections.
* For example, if there are many prefree_segments below given
* threshold, we can make them free by checkpoint. Then, we
* secure free segments which doesn't need fggc any more.
*/
if (__get_victim(sbi, &segno, gc_type) ||
prefree_segments(sbi)) {
ret = write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
segno = NULL_SEGNO;
} else if (has_not_enough_free_secs(sbi, 0, 0)) {
if (prefree_segments(sbi)) {
ret = write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
}
if (has_not_enough_free_secs(sbi, 0, 0))
gc_type = FG_GC;
}
if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
if (gc_type == BG_GC && !background)
goto stop;
if (!__get_victim(sbi, &segno, gc_type))
goto stop;
ret = 0;
@@ -961,13 +1002,17 @@ gc_more:
sbi->cur_victim_sec = NULL_SEGNO;
if (!sync) {
if (has_not_enough_free_secs(sbi, sec_freed, 0))
if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
segno = NULL_SEGNO;
goto gc_more;
}
if (gc_type == FG_GC)
ret = write_checkpoint(sbi, &cpc);
}
stop:
SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno;
mutex_unlock(&sbi->gc_mutex);
put_gc_inode(&gc_list);
@@ -979,7 +1024,7 @@ stop:
void build_gc_manager(struct f2fs_sb_info *sbi)
{
u64 main_count, resv_count, ovp_count, blocks_per_sec;
u64 main_count, resv_count, ovp_count;
DIRTY_I(sbi)->v_ops = &default_v_ops;
@@ -987,8 +1032,12 @@ void build_gc_manager(struct f2fs_sb_info *sbi)
main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec;
sbi->fggc_threshold = div_u64((main_count - ovp_count) * blocks_per_sec,
(main_count - resv_count));
sbi->fggc_threshold = div64_u64((main_count - ovp_count) *
BLKS_PER_SEC(sbi), (main_count - resv_count));
/* give warm/cold data area from slower device */
if (sbi->s_ndevs && sbi->segs_per_sec == 1)
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}

View File

@@ -63,19 +63,21 @@ void read_inline_data(struct page *page, struct page *ipage)
SetPageUptodate(page);
}
bool truncate_inline_inode(struct page *ipage, u64 from)
void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from)
{
void *addr;
if (from >= MAX_INLINE_DATA)
return false;
return;
addr = inline_data_addr(ipage);
f2fs_wait_on_page_writeback(ipage, NODE, true);
memset(addr + from, 0, MAX_INLINE_DATA - from);
set_page_dirty(ipage);
return true;
if (from == 0)
clear_inode_flag(inode, FI_DATA_EXIST);
}
int f2fs_read_inline_data(struct inode *inode, struct page *page)
@@ -111,7 +113,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
.sbi = F2FS_I_SB(dn->inode),
.type = DATA,
.op = REQ_OP_WRITE,
.op_flags = WRITE_SYNC | REQ_PRIO,
.op_flags = REQ_SYNC | REQ_PRIO,
.page = page,
.encrypted_page = NULL,
};
@@ -135,20 +137,23 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
/* write data page to try to make data consistent */
set_page_writeback(page);
fio.old_blkaddr = dn->data_blkaddr;
set_inode_flag(dn->inode, FI_HOT_DATA);
write_data_page(dn, &fio);
f2fs_wait_on_page_writeback(page, DATA, true);
if (dirty)
if (dirty) {
inode_dec_dirty_pages(dn->inode);
remove_dirty_inode(dn->inode);
}
/* this converted inline_data should be recovered. */
set_inode_flag(dn->inode, FI_APPEND_WRITE);
/* clear inline data and flag after data writeback */
truncate_inline_inode(dn->inode_page, 0);
truncate_inline_inode(dn->inode, dn->inode_page, 0);
clear_inline_node(dn->inode_page);
clear_out:
stat_dec_inline_inode(dn->inode);
f2fs_clear_inline_inode(dn->inode);
clear_inode_flag(dn->inode, FI_INLINE_DATA);
f2fs_put_dnode(dn);
return 0;
}
@@ -265,9 +270,8 @@ process_inline:
if (f2fs_has_inline_data(inode)) {
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
if (!truncate_inline_inode(ipage, 0))
return false;
f2fs_clear_inline_inode(inode);
truncate_inline_inode(inode, ipage, 0);
clear_inode_flag(inode, FI_INLINE_DATA);
f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
if (truncate_blocks(inode, 0, false))
@@ -298,7 +302,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
inline_dentry = inline_data_addr(ipage);
make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2);
make_dentry_ptr_inline(NULL, &d, inline_dentry);
de = find_target_dentry(fname, namehash, NULL, &d);
unlock_page(ipage);
if (de)
@@ -317,7 +321,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
dentry_blk = inline_data_addr(ipage);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
make_dentry_ptr_inline(NULL, &d, dentry_blk);
do_make_empty_dir(inode, parent, &d);
set_page_dirty(ipage);
@@ -378,7 +382,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
set_page_dirty(page);
/* clear inline dir and flag after data writeback */
truncate_inline_inode(ipage, 0);
truncate_inline_inode(dir, ipage, 0);
stat_dec_inline_dir(dir);
clear_inode_flag(dir, FI_INLINE_DENTRY);
@@ -398,7 +402,7 @@ static int f2fs_add_inline_entries(struct inode *dir,
unsigned long bit_pos = 0;
int err = 0;
make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2);
make_dentry_ptr_inline(NULL, &d, inline_dentry);
while (bit_pos < d.max) {
struct f2fs_dir_entry *de;
@@ -419,7 +423,7 @@ static int f2fs_add_inline_entries(struct inode *dir,
}
new_name.name = d.filename[bit_pos];
new_name.len = de->name_len;
new_name.len = le16_to_cpu(de->name_len);
ino = le32_to_cpu(de->ino);
fake_mode = get_de_type(de) << S_SHIFT;
@@ -453,7 +457,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
}
memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA);
truncate_inline_inode(ipage, 0);
truncate_inline_inode(dir, ipage, 0);
unlock_page(ipage);
@@ -525,14 +529,12 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name,
err = PTR_ERR(page);
goto fail;
}
if (f2fs_encrypted_inode(dir))
file_set_enc_name(inode);
}
f2fs_wait_on_page_writeback(ipage, NODE, true);
name_hash = f2fs_dentry_hash(new_name, NULL);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
make_dentry_ptr_inline(NULL, &d, dentry_blk);
f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos);
set_page_dirty(ipage);
@@ -573,7 +575,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
f2fs_put_page(page, 1);
dir->i_ctime = dir->i_mtime = current_time(dir);
f2fs_mark_inode_dirty_sync(dir);
f2fs_mark_inode_dirty_sync(dir, false);
if (inode)
f2fs_drop_nlink(dir, inode);
@@ -610,6 +612,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
struct f2fs_inline_dentry *inline_dentry = NULL;
struct page *ipage = NULL;
struct f2fs_dentry_ptr d;
int err;
if (ctx->pos == NR_INLINE_DENTRY)
return 0;
@@ -620,13 +623,14 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
inline_dentry = inline_data_addr(ipage);
make_dentry_ptr(inode, &d, (void *)inline_dentry, 2);
make_dentry_ptr_inline(inode, &d, inline_dentry);
if (!f2fs_fill_dentries(ctx, &d, 0, fstr))
err = f2fs_fill_dentries(ctx, &d, 0, fstr);
if (!err)
ctx->pos = NR_INLINE_DENTRY;
f2fs_put_page(ipage, 1);
return 0;
return err < 0 ? err : 0;
}
int f2fs_inline_data_fiemap(struct inode *inode,

View File

@@ -19,10 +19,11 @@
#include <trace/events/f2fs.h>
void f2fs_mark_inode_dirty_sync(struct inode *inode)
void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
{
if (f2fs_inode_dirtied(inode))
if (f2fs_inode_dirtied(inode, sync))
return;
mark_inode_dirty_sync(inode);
}
@@ -43,7 +44,7 @@ void f2fs_set_inode_flags(struct inode *inode)
new_fl |= S_DIRSYNC;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, false);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
@@ -252,6 +253,7 @@ retry:
int update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
struct extent_tree *et = F2FS_I(inode)->extent_tree;
f2fs_inode_synced(inode);
@@ -267,11 +269,13 @@ int update_inode(struct inode *inode, struct page *node_page)
ri->i_size = cpu_to_le64(i_size_read(inode));
ri->i_blocks = cpu_to_le64(inode->i_blocks);
if (F2FS_I(inode)->extent_tree)
set_raw_extent(&F2FS_I(inode)->extent_tree->largest,
&ri->i_ext);
else
if (et) {
read_lock(&et->lock);
set_raw_extent(&et->largest, &ri->i_ext);
read_unlock(&et->lock);
} else {
memset(&ri->i_ext, 0, sizeof(ri->i_ext));
}
set_raw_inline(inode, ri);
ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
@@ -312,7 +316,6 @@ retry:
} else if (err != -ENOENT) {
f2fs_stop_checkpoint(sbi, false);
}
f2fs_inode_synced(inode);
return 0;
}
ret = update_inode(inode, node_page);
@@ -335,7 +338,8 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
if (update_inode_page(inode))
update_inode_page(inode);
if (wbc && wbc->nr_to_write)
f2fs_balance_fs(sbi, true);
return 0;
}
@@ -368,10 +372,8 @@ void f2fs_evict_inode(struct inode *inode)
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE))
goto no_delete;
#endif
remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
sb_start_intwrite(inode->i_sb);
set_inode_flag(inode, FI_NO_ALLOC);
@@ -380,10 +382,18 @@ retry:
if (F2FS_HAS_BLOCKS(inode))
err = f2fs_truncate(inode);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
err = -EIO;
}
#endif
if (!err) {
f2fs_lock_op(sbi);
err = remove_inode_page(inode);
f2fs_unlock_op(sbi);
if (err == -ENOENT)
err = 0;
}
/* give more chances, if ENOMEM case */
@@ -400,13 +410,18 @@ no_delete:
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
/* ino == 0, if f2fs_new_inode() was failed t*/
if (inode->i_ino)
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino,
inode->i_ino);
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
if (is_inode_flag_set(inode, FI_APPEND_WRITE))
add_ino_entry(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(inode, FI_UPDATE_WRITE))
add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
if (inode->i_nlink) {
if (is_inode_flag_set(inode, FI_APPEND_WRITE))
add_ino_entry(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(inode, FI_UPDATE_WRITE))
add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
}
if (is_inode_flag_set(inode, FI_FREE_NID)) {
alloc_nid_failed(sbi, inode->i_ino);
clear_inode_flag(inode, FI_FREE_NID);
@@ -424,6 +439,19 @@ void handle_failed_inode(struct inode *inode)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct node_info ni;
/*
* clear nlink of inode in order to release resource of inode
* immediately.
*/
clear_nlink(inode);
/*
* we must call this to avoid inode being remained as dirty, resulting
* in a panic when flushing dirty inodes in gdirty_list.
*/
update_inode_page(inode);
f2fs_inode_synced(inode);
/* don't make bad inode, since it becomes a regular file. */
unlock_new_inode(inode);

View File

@@ -148,8 +148,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
ino = inode->i_ino;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@@ -163,6 +161,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
f2fs_balance_fs(sbi, true);
return 0;
out:
handle_failed_inode(inode);
@@ -321,12 +321,13 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
if (err)
goto err_out;
}
if (!IS_ERR(inode) && f2fs_encrypted_inode(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!fscrypt_has_permitted_context(dir, inode)) {
bool nokey = f2fs_encrypted_inode(inode) &&
!fscrypt_has_encryption_key(inode);
err = nokey ? -ENOKEY : -EPERM;
if (f2fs_encrypted_inode(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!fscrypt_has_permitted_context(dir, inode)) {
f2fs_msg(inode->i_sb, KERN_WARNING,
"Inconsistent encryption contexts: %lu/%lu",
dir->i_ino, inode->i_ino);
err = -EPERM;
goto err_out;
}
return d_splice_alias(inode, dentry);
@@ -403,7 +404,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
return err;
if (!fscrypt_has_encryption_key(dir))
return -EPERM;
return -ENOKEY;
disk_link.len = (fscrypt_fname_encrypted_size(dir, len) +
sizeof(struct fscrypt_symlink_data));
@@ -423,8 +424,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@@ -447,7 +446,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
goto err_out;
if (!fscrypt_has_encryption_key(inode)) {
err = -EPERM;
err = -ENOKEY;
goto err_out;
}
@@ -487,6 +486,8 @@ err_out:
}
kfree(sd);
f2fs_balance_fs(sbi, true);
return err;
out:
handle_failed_inode(inode);
@@ -508,8 +509,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_mapping->a_ops = &f2fs_dblock_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
f2fs_balance_fs(sbi, true);
set_inode_flag(inode, FI_INC_LINK);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
@@ -524,6 +523,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
f2fs_balance_fs(sbi, true);
return 0;
out_fail:
@@ -554,8 +555,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &f2fs_special_inode_operations;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
@@ -569,6 +568,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
f2fs_balance_fs(sbi, true);
return 0;
out:
handle_failed_inode(inode);
@@ -595,8 +596,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
}
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
if (err)
@@ -622,6 +621,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
/* link_count was changed by d_tmpfile as well. */
f2fs_unlock_op(sbi);
unlock_new_inode(inode);
f2fs_balance_fs(sbi, true);
return 0;
release_out:
@@ -663,6 +664,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
bool is_old_inline = f2fs_has_inline_dentry(old_dir);
int err = -ENOENT;
if ((f2fs_encrypted_inode(old_dir) &&
!fscrypt_has_encryption_key(old_dir)) ||
(f2fs_encrypted_inode(new_dir) &&
!fscrypt_has_encryption_key(new_dir)))
return -ENOKEY;
if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
!fscrypt_has_permitted_context(new_dir, old_inode)) {
err = -EPERM;
@@ -714,13 +721,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (err)
goto put_out_dir;
err = update_dent_inode(old_inode, new_inode,
&new_dentry->d_name);
if (err) {
release_orphan_inode(sbi);
goto put_out_dir;
}
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
new_inode->i_ctime = current_time(new_inode);
@@ -773,12 +773,10 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
down_write(&F2FS_I(old_inode)->i_sem);
file_lost_pino(old_inode);
if (new_inode && file_enc_name(new_inode))
file_set_enc_name(old_inode);
up_write(&F2FS_I(old_inode)->i_sem);
old_inode->i_ctime = current_time(old_inode);
f2fs_mark_inode_dirty_sync(old_inode);
f2fs_mark_inode_dirty_sync(old_inode, false);
f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
@@ -843,6 +841,12 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
int old_nlink = 0, new_nlink = 0;
int err = -ENOENT;
if ((f2fs_encrypted_inode(old_dir) &&
!fscrypt_has_encryption_key(old_dir)) ||
(f2fs_encrypted_inode(new_dir) &&
!fscrypt_has_encryption_key(new_dir)))
return -ENOKEY;
if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
(old_dir != new_dir) &&
(!fscrypt_has_permitted_context(new_dir, old_inode) ||
@@ -896,8 +900,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
old_nlink = old_dir_entry ? -1 : 1;
new_nlink = -old_nlink;
err = -EMLINK;
if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) ||
(new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX))
if ((old_nlink > 0 && old_dir->i_nlink >= F2FS_LINK_MAX) ||
(new_nlink > 0 && new_dir->i_nlink >= F2FS_LINK_MAX))
goto out_new_dir;
}
@@ -905,18 +909,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_lock_op(sbi);
err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name);
if (err)
goto out_unlock;
if (file_enc_name(new_inode))
file_set_enc_name(old_inode);
err = update_dent_inode(new_inode, old_inode, &old_dentry->d_name);
if (err)
goto out_undo;
if (file_enc_name(old_inode))
file_set_enc_name(new_inode);
/* update ".." directory entry info of old dentry */
if (old_dir_entry)
f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
@@ -938,7 +930,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_i_links_write(old_dir, old_nlink > 0);
up_write(&F2FS_I(old_dir)->i_sem);
}
f2fs_mark_inode_dirty_sync(old_dir);
f2fs_mark_inode_dirty_sync(old_dir, false);
/* update directory entry info of new dir inode */
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
@@ -953,21 +945,13 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_i_links_write(new_dir, new_nlink > 0);
up_write(&F2FS_I(new_dir)->i_sem);
}
f2fs_mark_inode_dirty_sync(new_dir);
f2fs_mark_inode_dirty_sync(new_dir, false);
f2fs_unlock_op(sbi);
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
f2fs_sync_fs(sbi->sb, 1);
return 0;
out_undo:
/*
* Still we may fail to recover name info of f2fs_inode here
* Drop it, once its name is set as encrypted
*/
update_dent_inode(old_inode, old_inode, &old_dentry->d_name);
out_unlock:
f2fs_unlock_op(sbi);
out_new_dir:
if (new_dir_entry) {
f2fs_dentry_kunmap(new_inode, new_dir_page);

File diff suppressed because it is too large Load Diff

View File

@@ -9,10 +9,10 @@
* published by the Free Software Foundation.
*/
/* start node id of a node block dedicated to the given node id */
#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
#define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
/* node block offset on the NAT area dedicated to the given start node id */
#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK)
#define NAT_BLOCK_OFFSET(start_nid) ((start_nid) / NAT_ENTRY_PER_BLOCK)
/* # of pages to perform synchronous readahead before building free nids */
#define FREE_NID_PAGES 8
@@ -62,16 +62,16 @@ struct nat_entry {
struct node_info ni; /* in-memory node information */
};
#define nat_get_nid(nat) (nat->ni.nid)
#define nat_set_nid(nat, n) (nat->ni.nid = n)
#define nat_get_blkaddr(nat) (nat->ni.blk_addr)
#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b)
#define nat_get_ino(nat) (nat->ni.ino)
#define nat_set_ino(nat, i) (nat->ni.ino = i)
#define nat_get_version(nat) (nat->ni.version)
#define nat_set_version(nat, v) (nat->ni.version = v)
#define nat_get_nid(nat) ((nat)->ni.nid)
#define nat_set_nid(nat, n) ((nat)->ni.nid = (n))
#define nat_get_blkaddr(nat) ((nat)->ni.blk_addr)
#define nat_set_blkaddr(nat, b) ((nat)->ni.blk_addr = (b))
#define nat_get_ino(nat) ((nat)->ni.ino)
#define nat_set_ino(nat, i) ((nat)->ni.ino = (i))
#define nat_get_version(nat) ((nat)->ni.version)
#define nat_set_version(nat, v) ((nat)->ni.version = (v))
#define inc_node_version(version) (++version)
#define inc_node_version(version) (++(version))
static inline void copy_node_info(struct node_info *dst,
struct node_info *src)
@@ -169,14 +169,15 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *fnid;
spin_lock(&nm_i->free_nid_list_lock);
if (nm_i->fcnt <= 0) {
spin_unlock(&nm_i->free_nid_list_lock);
spin_lock(&nm_i->nid_list_lock);
if (nm_i->nid_cnt[FREE_NID_LIST] <= 0) {
spin_unlock(&nm_i->nid_list_lock);
return;
}
fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list);
fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
struct free_nid, list);
*nid = fnid->nid;
spin_unlock(&nm_i->free_nid_list_lock);
spin_unlock(&nm_i->nid_list_lock);
}
/*
@@ -185,6 +186,12 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
#ifdef CONFIG_F2FS_CHECK_FS
if (memcmp(nm_i->nat_bitmap, nm_i->nat_bitmap_mir,
nm_i->bitmap_size))
f2fs_bug_on(sbi, 1);
#endif
memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size);
}
@@ -193,13 +200,16 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
struct f2fs_nm_info *nm_i = NM_I(sbi);
pgoff_t block_off;
pgoff_t block_addr;
int seg_off;
/*
* block_off = segment_off * 512 + off_in_segment
* OLD = (segment_off * 512) * 2 + off_in_segment
* NEW = 2 * (segment_off * 512 + off_in_segment) - off_in_segment
*/
block_off = NAT_BLOCK_OFFSET(start);
seg_off = block_off >> sbi->log_blocks_per_seg;
block_addr = (pgoff_t)(nm_i->nat_blkaddr +
(seg_off << sbi->log_blocks_per_seg << 1) +
(block_off << 1) -
(block_off & (sbi->blocks_per_seg - 1)));
if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
@@ -227,6 +237,9 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
f2fs_change_bit(block_off, nm_i->nat_bitmap);
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_change_bit(block_off, nm_i->nat_bitmap_mir);
#endif
}
static inline nid_t ino_of_node(struct page *node_page)
@@ -290,14 +303,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
struct f2fs_node *rn = F2FS_NODE(page);
size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
__u64 cp_ver = cur_cp_version(ckpt);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
cp_ver |= (cur_cp_crc(ckpt) << 32);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
__u64 crc = le32_to_cpu(*((__le32 *)
((unsigned char *)ckpt + crc_offset)));
cp_ver |= (crc << 32);
}
rn->footer.cp_ver = cpu_to_le64(cp_ver);
rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
}
@@ -305,15 +315,12 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
static inline bool is_recoverable_dnode(struct page *page)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
__u64 cp_ver = cur_cp_version(ckpt);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
__u64 crc = le32_to_cpu(*((__le32 *)
((unsigned char *)ckpt + crc_offset)));
cp_ver |= (crc << 32);
}
return cpu_to_le64(cp_ver) == cpver_of_node(page);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
cp_ver |= (cur_cp_crc(ckpt) << 32);
return cp_ver == cpver_of_node(page);
}
/*
@@ -342,7 +349,7 @@ static inline bool IS_DNODE(struct page *node_page)
unsigned int ofs = ofs_of_node(node_page);
if (f2fs_has_xattr_block(ofs))
return false;
return true;
if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
ofs == 5 + 2 * NIDS_PER_BLOCK)

View File

@@ -180,13 +180,15 @@ static void recover_inode(struct inode *inode, struct page *page)
inode->i_mode = le16_to_cpu(raw->i_mode);
f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
F2FS_I(inode)->i_advise = raw->i_advise;
if (file_enc_name(inode))
name = "<encrypted>";
else
@@ -196,33 +198,8 @@ static void recover_inode(struct inode *inode, struct page *page)
ino_of_node(page), name);
}
static bool is_same_inode(struct inode *inode, struct page *ipage)
{
struct f2fs_inode *ri = F2FS_INODE(ipage);
struct timespec disk;
if (!IS_INODE(ipage))
return true;
disk.tv_sec = le64_to_cpu(ri->i_ctime);
disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
if (timespec_compare(&inode->i_ctime, &disk) > 0)
return false;
disk.tv_sec = le64_to_cpu(ri->i_atime);
disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
if (timespec_compare(&inode->i_atime, &disk) > 0)
return false;
disk.tv_sec = le64_to_cpu(ri->i_mtime);
disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
if (timespec_compare(&inode->i_mtime, &disk) > 0)
return false;
return true;
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
bool check_only)
{
struct curseg_info *curseg;
struct page *page = NULL;
@@ -248,11 +225,9 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
goto next;
entry = get_fsync_inode(head, ino_of_node(page));
if (entry) {
if (!is_same_inode(entry->inode, page))
goto next;
} else {
if (IS_INODE(page) && is_dent_dnode(page)) {
if (!entry) {
if (!check_only &&
IS_INODE(page) && is_dent_dnode(page)) {
err = recover_inode_page(sbi, page);
if (err)
break;
@@ -405,11 +380,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (IS_INODE(page)) {
recover_inline_xattr(inode, page);
} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
/*
* Deprecated; xattr blocks should be found from cold log.
* But, we should remain this for backward compatibility.
*/
recover_xattr_data(inode, page, blkaddr);
err = recover_xattr_data(inode, page, blkaddr);
if (!err)
recovered++;
goto out;
}
@@ -454,8 +427,10 @@ retry_dn:
continue;
}
if ((start + 1) << PAGE_SHIFT > i_size_read(inode))
f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
if (!file_keep_isize(inode) &&
(i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
f2fs_i_size_write(inode,
(loff_t)(start + 1) << PAGE_SHIFT);
/*
* dest is reserved block, invalidate src block
@@ -507,8 +482,10 @@ err:
f2fs_put_dnode(&dn);
out:
f2fs_msg(sbi->sb, KERN_NOTICE,
"recover_data: ino = %lx, recovered = %d blocks, err = %d",
inode->i_ino, recovered, err);
"recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
inode->i_ino,
file_keep_isize(inode) ? "keep" : "recover",
recovered, err);
return err;
}
@@ -576,10 +553,8 @@ next:
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct list_head inode_list;
struct list_head dir_list;
block_t blkaddr;
int err;
int ret = 0;
bool need_writecp = false;
@@ -595,10 +570,8 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
/* prevent checkpoint */
mutex_lock(&sbi->cp_mutex);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list);
err = find_fsync_dnodes(sbi, &inode_list, check_only);
if (err || list_empty(&inode_list))
goto out;

File diff suppressed because it is too large Load Diff

View File

@@ -18,79 +18,87 @@
#define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */
#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno)
#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno)
#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA)
#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE)
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno)
#define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno)
#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE)
#define IS_CURSEG(sbi, seg) \
((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
(seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
(((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
#define IS_CURSEC(sbi, secno) \
((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
sbi->segs_per_sec) || \
(secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
sbi->segs_per_sec)) \
(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
(sbi)->segs_per_sec)) \
#define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr)
#define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr)
#define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments)
#define MAIN_SECS(sbi) (sbi->total_sections)
#define MAIN_SECS(sbi) ((sbi)->total_sections)
#define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count)
#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg)
#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg)
#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
#define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \
sbi->log_blocks_per_seg))
#define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \
(sbi)->log_blocks_per_seg))
#define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \
(GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg))
(GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg))
#define NEXT_FREE_BLKADDR(sbi, curseg) \
(START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff)
(START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff)
#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi))
#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg)
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg)
#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1))
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
(((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \
((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define GET_SECNO(sbi, segno) \
((segno) / sbi->segs_per_sec)
#define GET_ZONENO_FROM_SEGNO(sbi, segno) \
((segno / sbi->segs_per_sec) / sbi->secs_per_zone)
#define BLKS_PER_SEC(sbi) \
((sbi)->segs_per_sec * (sbi)->blocks_per_seg)
#define GET_SEC_FROM_SEG(sbi, segno) \
((segno) / (sbi)->segs_per_sec)
#define GET_SEG_FROM_SEC(sbi, secno) \
((secno) * (sbi)->segs_per_sec)
#define GET_ZONE_FROM_SEC(sbi, secno) \
((secno) / (sbi)->secs_per_zone)
#define GET_ZONE_FROM_SEG(sbi, segno) \
GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno))
#define GET_SUM_BLOCK(sbi, segno) \
((sbi->sm_info->ssa_blkaddr) + segno)
((sbi)->sm_info->ssa_blkaddr + (segno))
#define GET_SUM_TYPE(footer) ((footer)->entry_type)
#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type)
#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = (type))
#define SIT_ENTRY_OFFSET(sit_i, segno) \
(segno % sit_i->sents_per_block)
((segno) % (sit_i)->sents_per_block)
#define SIT_BLOCK_OFFSET(segno) \
(segno / SIT_ENTRY_PER_BLOCK)
((segno) / SIT_ENTRY_PER_BLOCK)
#define START_SEGNO(segno) \
(SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK)
#define SIT_BLK_CNT(sbi) \
@@ -101,9 +109,7 @@
#define SECTOR_FROM_BLOCK(blk_addr) \
(((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK)
#define SECTOR_TO_BLOCK(sectors) \
(sectors >> F2FS_LOG_SECTORS_PER_BLOCK)
#define MAX_BIO_BLOCKS(sbi) \
((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES))
((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK)
/*
* indicate a block allocation direction: RIGHT and LEFT.
@@ -132,7 +138,10 @@ enum {
*/
enum {
GC_CB = 0,
GC_GREEDY
GC_GREEDY,
ALLOC_NEXT,
FLUSH_DEVICE,
MAX_GC_POLICY,
};
/*
@@ -164,6 +173,9 @@ struct seg_entry {
unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */
unsigned int padding:6; /* padding */
unsigned char *cur_valid_map; /* validity bitmap of blocks */
#ifdef CONFIG_F2FS_CHECK_FS
unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */
#endif
/*
* # of valid blocks and the validity bitmap stored in the the last
* checkpoint pack. This information is used by the SSR mode.
@@ -186,9 +198,12 @@ struct segment_allocation {
* the page is atomically written, and it is in inmem_pages list.
*/
#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1)
#define DUMMY_WRITTEN_PAGE ((unsigned long)-2)
#define IS_ATOMIC_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
#define IS_DUMMY_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
struct inmem_pages {
struct list_head list;
@@ -203,6 +218,9 @@ struct sit_info {
block_t sit_blocks; /* # of blocks used by SIT area */
block_t written_valid_blocks; /* # of valid blocks in main area */
char *sit_bitmap; /* SIT bitmap pointer */
#ifdef CONFIG_F2FS_CHECK_FS
char *sit_bitmap_mir; /* SIT bitmap mirror */
#endif
unsigned int bitmap_size; /* SIT bitmap size */
unsigned long *tmp_map; /* bitmap for temporal use */
@@ -218,6 +236,8 @@ struct sit_info {
unsigned long long mounted_time; /* mount time */
unsigned long long min_mtime; /* min. modification time */
unsigned long long max_mtime; /* max. modification time */
unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
};
struct free_segmap_info {
@@ -294,17 +314,17 @@ static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct sit_info *sit_i = SIT_I(sbi);
return &sit_i->sec_entries[GET_SECNO(sbi, segno)];
return &sit_i->sec_entries[GET_SEC_FROM_SEG(sbi, segno)];
}
static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
unsigned int segno, int section)
unsigned int segno, bool use_section)
{
/*
* In order to get # of valid blocks in a section instantly from many
* segments, f2fs manages two counting structures separately.
*/
if (section > 1)
if (use_section && sbi->segs_per_sec > 1)
return get_sec_entry(sbi, segno)->valid_blocks;
else
return get_seg_entry(sbi, segno)->valid_blocks;
@@ -317,6 +337,9 @@ static inline void seg_info_from_raw_sit(struct seg_entry *se,
se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs);
memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
#ifdef CONFIG_F2FS_CHECK_FS
memcpy(se->cur_valid_map_mir, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
#endif
se->type = GET_SIT_TYPE(rs);
se->mtime = le64_to_cpu(rs->mtime);
}
@@ -346,8 +369,8 @@ static inline unsigned int find_next_inuse(struct free_segmap_info *free_i,
static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int start_segno = secno * sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
spin_lock(&free_i->segmap_lock);
@@ -367,7 +390,8 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
set_bit(segno, free_i->free_segmap);
free_i->free_segments--;
if (!test_and_set_bit(secno, free_i->free_secmap))
@@ -378,8 +402,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int start_segno = secno * sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
spin_lock(&free_i->segmap_lock);
@@ -400,7 +424,8 @@ static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = segno / sbi->segs_per_sec;
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
spin_lock(&free_i->segmap_lock);
if (!test_and_set_bit(segno, free_i->free_segmap)) {
free_i->free_segments--;
@@ -414,6 +439,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
void *dst_addr)
{
struct sit_info *sit_i = SIT_I(sbi);
#ifdef CONFIG_F2FS_CHECK_FS
if (memcmp(sit_i->sit_bitmap, sit_i->sit_bitmap_mir,
sit_i->bitmap_size))
f2fs_bug_on(sbi, 1);
#endif
memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size);
}
@@ -459,24 +490,25 @@ static inline int overprovision_segments(struct f2fs_sb_info *sbi)
static inline int overprovision_sections(struct f2fs_sb_info *sbi)
{
return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec;
return GET_SEC_FROM_SEG(sbi, (unsigned int)overprovision_segments(sbi));
}
static inline int reserved_sections(struct f2fs_sb_info *sbi)
{
return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec;
return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
}
static inline bool need_SSR(struct f2fs_sb_info *sbi)
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
if (test_opt(sbi, LFS))
return false;
return free_sections(sbi) <= (node_secs + 2 * dent_secs +
reserved_sections(sbi) + 1);
return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
2 * reserved_sections(sbi));
}
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
@@ -484,14 +516,14 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
node_secs += get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return false;
return (free_sections(sbi) + freed) <=
(node_secs + 2 * dent_secs + reserved_sections(sbi) + needed);
(node_secs + 2 * dent_secs + imeta_secs +
reserved_sections(sbi) + needed);
}
static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
@@ -521,6 +553,7 @@ static inline int utilization(struct f2fs_sb_info *sbi)
*/
#define DEF_MIN_IPU_UTIL 70
#define DEF_MIN_FSYNC_BLOCKS 8
#define DEF_MIN_HOT_BLOCKS 16
enum {
F2FS_IPU_FORCE,
@@ -528,17 +561,15 @@ enum {
F2FS_IPU_UTIL,
F2FS_IPU_SSR_UTIL,
F2FS_IPU_FSYNC,
F2FS_IPU_ASYNC,
};
static inline bool need_inplace_update(struct inode *inode)
static inline bool need_inplace_update_policy(struct inode *inode,
struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
unsigned int policy = SM_I(sbi)->ipu_policy;
/* IPU can be done only for the user data */
if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode))
return false;
if (test_opt(sbi, LFS))
return false;
@@ -553,6 +584,15 @@ static inline bool need_inplace_update(struct inode *inode)
utilization(sbi) > SM_I(sbi)->min_ipu_util)
return true;
/*
* IPU for rewrite async pages
*/
if (policy & (0x1 << F2FS_IPU_ASYNC) &&
fio && fio->op == REQ_OP_WRITE &&
!(fio->op_flags & REQ_SYNC) &&
!f2fs_encrypted_inode(inode))
return true;
/* this is only set during fdatasync */
if (policy & (0x1 << F2FS_IPU_FSYNC) &&
is_inode_flag_set(inode, FI_NEED_IPU))
@@ -633,6 +673,12 @@ static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
check_seg_range(sbi, start);
#ifdef CONFIG_F2FS_CHECK_FS
if (f2fs_test_bit(offset, sit_i->sit_bitmap) !=
f2fs_test_bit(offset, sit_i->sit_bitmap_mir))
f2fs_bug_on(sbi, 1);
#endif
/* calculate sit block address */
if (f2fs_test_bit(offset, sit_i->sit_bitmap))
blk_addr += sit_i->sit_blocks;
@@ -658,13 +704,17 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
unsigned int block_off = SIT_BLOCK_OFFSET(start);
f2fs_change_bit(block_off, sit_i->sit_bitmap);
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_change_bit(block_off, sit_i->sit_bitmap_mir);
#endif
}
static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec -
sit_i->mounted_time;
time64_t now = ktime_get_real_seconds();
return sit_i->elapsed_time + now - sit_i->mounted_time;
}
static inline void set_summary(struct f2fs_summary *sum, nid_t nid,
@@ -691,7 +741,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
unsigned int secno)
{
if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >=
if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >=
sbi->fggc_threshold)
return true;
return false;
@@ -704,19 +754,12 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
return false;
}
static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
{
struct block_device *bdev = sbi->sb->s_bdev;
struct request_queue *q = bdev_get_queue(bdev);
return SECTOR_TO_BLOCK(queue_max_sectors(q));
}
/*
* It is very important to gather dirty pages and write at once, so that we can
* submit a big bio without interfering other data writes.
* By default, 512 pages for directory data,
* 512 pages (2MB) * 3 for three types of nodes, and
* max_bio_blocks for meta are set.
* 512 pages (2MB) * 8 for nodes, and
* 256 pages * 8 for meta are set.
*/
static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
{
@@ -728,7 +771,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
else if (type == NODE)
return 8 * sbi->blocks_per_seg;
else if (type == META)
return 8 * MAX_BIO_BLOCKS(sbi);
return 8 * BIO_MAX_PAGES;
else
return 0;
}
@@ -745,11 +788,9 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
return 0;
nr_to_write = wbc->nr_to_write;
desired = BIO_MAX_PAGES;
if (type == NODE)
desired = 2 * max_hw_blocks(sbi);
else
desired = MAX_BIO_BLOCKS(sbi);
desired <<= 1;
wbc->nr_to_write = desired;
return desired - nr_to_write;

View File

@@ -21,14 +21,16 @@ static unsigned int shrinker_run_no;
static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
{
return NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
return count > 0 ? count : 0;
}
static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
{
if (NM_I(sbi)->fcnt > MAX_FREE_NIDS)
return NM_I(sbi)->fcnt - MAX_FREE_NIDS;
return 0;
long count = NM_I(sbi)->nid_cnt[FREE_NID_LIST] - MAX_FREE_NIDS;
return count > 0 ? count : 0;
}
static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)

View File

@@ -49,6 +49,7 @@ char *fault_name[FAULT_MAX] = {
[FAULT_BLOCK] = "no more block",
[FAULT_DIR_DEPTH] = "too big dir depth",
[FAULT_EVICT_INODE] = "evict_inode fail",
[FAULT_TRUNCATE] = "truncate fail",
[FAULT_IO] = "IO error",
[FAULT_CHECKPOINT] = "checkpoint error",
};
@@ -82,6 +83,7 @@ enum {
Opt_discard,
Opt_nodiscard,
Opt_noheap,
Opt_heap,
Opt_user_xattr,
Opt_nouser_xattr,
Opt_acl,
@@ -89,6 +91,7 @@ enum {
Opt_active_logs,
Opt_disable_ext_identify,
Opt_inline_xattr,
Opt_noinline_xattr,
Opt_inline_data,
Opt_inline_dentry,
Opt_noinline_dentry,
@@ -101,6 +104,7 @@ enum {
Opt_noinline_data,
Opt_data_flush,
Opt_mode,
Opt_io_size_bits,
Opt_fault_injection,
Opt_lazytime,
Opt_nolazytime,
@@ -114,6 +118,7 @@ static match_table_t f2fs_tokens = {
{Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"},
{Opt_noheap, "no_heap"},
{Opt_heap, "heap"},
{Opt_user_xattr, "user_xattr"},
{Opt_nouser_xattr, "nouser_xattr"},
{Opt_acl, "acl"},
@@ -121,6 +126,7 @@ static match_table_t f2fs_tokens = {
{Opt_active_logs, "active_logs=%u"},
{Opt_disable_ext_identify, "disable_ext_identify"},
{Opt_inline_xattr, "inline_xattr"},
{Opt_noinline_xattr, "noinline_xattr"},
{Opt_inline_data, "inline_data"},
{Opt_inline_dentry, "inline_dentry"},
{Opt_noinline_dentry, "noinline_dentry"},
@@ -133,6 +139,7 @@ static match_table_t f2fs_tokens = {
{Opt_noinline_data, "noinline_data"},
{Opt_data_flush, "data_flush"},
{Opt_mode, "mode=%s"},
{Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
@@ -143,6 +150,7 @@ static match_table_t f2fs_tokens = {
enum {
GC_THREAD, /* struct f2fs_gc_thread */
SM_INFO, /* struct f2fs_sm_info */
DCC_INFO, /* struct discard_cmd_control */
NM_INFO, /* struct f2fs_nm_info */
F2FS_SBI, /* struct f2fs_sb_info */
#ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -166,6 +174,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
return (unsigned char *)sbi->gc_thread;
else if (struct_type == SM_INFO)
return (unsigned char *)SM_I(sbi);
else if (struct_type == DCC_INFO)
return (unsigned char *)SM_I(sbi)->dcc_info;
else if (struct_type == NM_INFO)
return (unsigned char *)NM_I(sbi);
else if (struct_type == F2FS_SBI)
@@ -281,11 +291,12 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
@@ -311,6 +322,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
ATTR_LIST(min_hot_blocks),
ATTR_LIST(max_victim_search),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
@@ -412,17 +424,26 @@ static int parse_options(struct super_block *sb, char *options)
q = bdev_get_queue(sb->s_bdev);
if (blk_queue_discard(q)) {
set_opt(sbi, DISCARD);
} else {
} else if (!f2fs_sb_mounted_blkzoned(sb)) {
f2fs_msg(sb, KERN_WARNING,
"mounting with \"discard\" option, but "
"the device does not support discard");
}
break;
case Opt_nodiscard:
if (f2fs_sb_mounted_blkzoned(sb)) {
f2fs_msg(sb, KERN_WARNING,
"discard is required for zoned block devices");
return -EINVAL;
}
clear_opt(sbi, DISCARD);
break;
case Opt_noheap:
set_opt(sbi, NOHEAP);
break;
case Opt_heap:
clear_opt(sbi, NOHEAP);
break;
#ifdef CONFIG_F2FS_FS_XATTR
case Opt_user_xattr:
set_opt(sbi, XATTR_USER);
@@ -433,6 +454,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_inline_xattr:
set_opt(sbi, INLINE_XATTR);
break;
case Opt_noinline_xattr:
clear_opt(sbi, INLINE_XATTR);
break;
#else
case Opt_user_xattr:
f2fs_msg(sb, KERN_INFO,
@@ -446,6 +470,10 @@ static int parse_options(struct super_block *sb, char *options)
f2fs_msg(sb, KERN_INFO,
"inline_xattr options not supported");
break;
case Opt_noinline_xattr:
f2fs_msg(sb, KERN_INFO,
"noinline_xattr options not supported");
break;
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
case Opt_acl:
@@ -512,6 +540,13 @@ static int parse_options(struct super_block *sb, char *options)
return -ENOMEM;
if (strlen(name) == 8 &&
!strncmp(name, "adaptive", 8)) {
if (f2fs_sb_mounted_blkzoned(sb)) {
f2fs_msg(sb, KERN_WARNING,
"adaptive mode is not allowed with "
"zoned block device feature");
kfree(name);
return -EINVAL;
}
set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE);
} else if (strlen(name) == 3 &&
!strncmp(name, "lfs", 3)) {
@@ -522,11 +557,23 @@ static int parse_options(struct super_block *sb, char *options)
}
kfree(name);
break;
case Opt_io_size_bits:
if (args->from && match_int(args, &arg))
return -EINVAL;
if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
f2fs_msg(sb, KERN_WARNING,
"Not support %d, larger than %d",
1 << arg, BIO_MAX_PAGES);
return -EINVAL;
}
sbi->write_io_size_bits = arg;
break;
case Opt_fault_injection:
if (args->from && match_int(args, &arg))
return -EINVAL;
#ifdef CONFIG_F2FS_FAULT_INJECTION
f2fs_build_fault_attr(sbi, arg);
set_opt(sbi, FAULT_INJECTION);
#else
f2fs_msg(sb, KERN_INFO,
"FAULT_INJECTION was not selected");
@@ -545,6 +592,13 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
}
}
if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
f2fs_msg(sb, KERN_ERR,
"Should set mode=lfs with %uKB-sized IO",
F2FS_IO_SIZE_KB(sbi));
return -EINVAL;
}
return 0;
}
@@ -578,6 +632,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
static int f2fs_drop_inode(struct inode *inode)
{
int ret;
/*
* This is to avoid a deadlock condition like below.
* writeback_single_inode(inode)
@@ -610,30 +665,33 @@ static int f2fs_drop_inode(struct inode *inode)
spin_lock(&inode->i_lock);
atomic_dec(&inode->i_count);
}
trace_f2fs_drop_inode(inode, 0);
return 0;
}
return generic_drop_inode(inode);
ret = generic_drop_inode(inode);
trace_f2fs_drop_inode(inode, ret);
return ret;
}
int f2fs_inode_dirtied(struct inode *inode)
int f2fs_inode_dirtied(struct inode *inode, bool sync)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int ret = 0;
spin_lock(&sbi->inode_lock[DIRTY_META]);
if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return 1;
ret = 1;
} else {
set_inode_flag(inode, FI_DIRTY_INODE);
stat_inc_dirty_inode(sbi, DIRTY_META);
}
set_inode_flag(inode, FI_DIRTY_INODE);
list_add_tail(&F2FS_I(inode)->gdirty_list,
if (sync && list_empty(&F2FS_I(inode)->gdirty_list)) {
list_add_tail(&F2FS_I(inode)->gdirty_list,
&sbi->inode_list[DIRTY_META]);
inc_page_count(sbi, F2FS_DIRTY_IMETA);
stat_inc_dirty_inode(sbi, DIRTY_META);
inc_page_count(sbi, F2FS_DIRTY_IMETA);
}
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return 0;
return ret;
}
void f2fs_inode_synced(struct inode *inode)
@@ -645,10 +703,12 @@ void f2fs_inode_synced(struct inode *inode)
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return;
}
list_del_init(&F2FS_I(inode)->gdirty_list);
if (!list_empty(&F2FS_I(inode)->gdirty_list)) {
list_del_init(&F2FS_I(inode)->gdirty_list);
dec_page_count(sbi, F2FS_DIRTY_IMETA);
}
clear_inode_flag(inode, FI_DIRTY_INODE);
clear_inode_flag(inode, FI_AUTO_RECOVER);
dec_page_count(sbi, F2FS_DIRTY_IMETA);
stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META);
spin_unlock(&sbi->inode_lock[DIRTY_META]);
}
@@ -672,7 +732,7 @@ static void f2fs_dirty_inode(struct inode *inode, int flags)
if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
clear_inode_flag(inode, FI_AUTO_RECOVER);
f2fs_inode_dirtied(inode);
f2fs_inode_dirtied(inode, false);
}
static void f2fs_i_callback(struct rcu_head *head)
@@ -692,6 +752,19 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi)
percpu_counter_destroy(&sbi->total_valid_inode_count);
}
static void destroy_device_list(struct f2fs_sb_info *sbi)
{
int i;
for (i = 0; i < sbi->s_ndevs; i++) {
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
kfree(FDEV(i).blkz_type);
#endif
}
kfree(sbi->devs);
}
static void f2fs_put_super(struct super_block *sb)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -721,6 +794,16 @@ static void f2fs_put_super(struct super_block *sb)
write_checkpoint(sbi, &cpc);
}
/* be sure to wait for any on-going discard commands */
f2fs_wait_discard_bios(sbi);
if (!sbi->discard_blks) {
struct cp_control cpc = {
.reason = CP_UMOUNT | CP_TRIMMED,
};
write_checkpoint(sbi, &cpc);
}
/* write_checkpoint can update stat informaion */
f2fs_destroy_stats(sbi);
@@ -729,7 +812,6 @@ static void f2fs_put_super(struct super_block *sb)
* In addition, EIO will skip do checkpoint, we need this as well.
*/
release_ino_entry(sbi, true);
release_discard_addrs(sbi);
f2fs_leave_shrinker(sbi);
mutex_unlock(&sbi->umount_mutex);
@@ -753,6 +835,8 @@ static void f2fs_put_super(struct super_block *sb)
crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->raw_super);
destroy_device_list(sbi);
mempool_destroy(sbi->write_io_dummy);
destroy_percpu_info(sbi);
kfree(sbi);
}
@@ -780,13 +864,17 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
static int f2fs_freeze(struct super_block *sb)
{
int err;
if (f2fs_readonly(sb))
return 0;
err = f2fs_sync_fs(sb, 1);
return err;
/* IO error happened before */
if (unlikely(f2fs_cp_error(F2FS_SB(sb))))
return -EIO;
/* must be clean, since sync_filesystem() was already called */
if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY))
return -EINVAL;
return 0;
}
static int f2fs_unfreeze(struct super_block *sb)
@@ -813,7 +901,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bavail = user_block_count - valid_user_blocks(sbi);
buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
buf->f_ffree = buf->f_files - valid_inode_count(sbi);
buf->f_ffree = min(buf->f_files - valid_node_count(sbi),
buf->f_bavail);
buf->f_namelen = F2FS_NAME_LEN;
buf->f_fsid.val[0] = (u32)id;
@@ -839,7 +928,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
if (test_opt(sbi, DISCARD))
seq_puts(seq, ",discard");
if (test_opt(sbi, NOHEAP))
seq_puts(seq, ",no_heap_alloc");
seq_puts(seq, ",no_heap");
else
seq_puts(seq, ",heap");
#ifdef CONFIG_F2FS_FS_XATTR
if (test_opt(sbi, XATTR_USER))
seq_puts(seq, ",user_xattr");
@@ -847,6 +938,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",nouser_xattr");
if (test_opt(sbi, INLINE_XATTR))
seq_puts(seq, ",inline_xattr");
else
seq_puts(seq, ",noinline_xattr");
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
if (test_opt(sbi, POSIX_ACL))
@@ -883,6 +976,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
else if (test_opt(sbi, LFS))
seq_puts(seq, "lfs");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
if (F2FS_IO_SIZE_BITS(sbi))
seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (test_opt(sbi, FAULT_INJECTION))
seq_puts(seq, ",fault_injection");
#endif
return 0;
}
@@ -904,7 +1003,7 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
if ((i % 10) == 0)
seq_printf(seq, "%-10d", i);
seq_printf(seq, "%d|%-3u", se->type,
get_valid_blocks(sbi, i, 1));
get_valid_blocks(sbi, i, false));
if ((i % 10) == 9 || i == (total_segs - 1))
seq_putc(seq, '\n');
else
@@ -930,7 +1029,7 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset)
seq_printf(seq, "%-10d", i);
seq_printf(seq, "%d|%-3u|", se->type,
get_valid_blocks(sbi, i, 1));
get_valid_blocks(sbi, i, false));
for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
seq_printf(seq, " %.2x", se->cur_valid_map[j]);
seq_putc(seq, '\n');
@@ -960,12 +1059,14 @@ static void default_options(struct f2fs_sb_info *sbi)
sbi->active_logs = NR_CURSEG_TYPE;
set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_XATTR);
set_opt(sbi, INLINE_DATA);
set_opt(sbi, INLINE_DENTRY);
set_opt(sbi, EXTENT_CACHE);
set_opt(sbi, NOHEAP);
sbi->sb->s_flags |= MS_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
if (f2fs_sb_mounted_hmsmr(sbi->sb)) {
if (f2fs_sb_mounted_blkzoned(sbi->sb)) {
set_opt_mode(sbi, F2FS_MOUNT_LFS);
set_opt(sbi, DISCARD);
} else {
@@ -1067,8 +1168,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
* or if flush_merge is not passed in mount option.
*/
if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
destroy_flush_cmd_control(sbi);
} else if (!SM_I(sbi)->cmd_control_info) {
clear_opt(sbi, FLUSH_MERGE);
destroy_flush_cmd_control(sbi, false);
} else {
err = create_flush_cmd_control(sbi);
if (err)
goto restore_gc;
@@ -1120,12 +1222,6 @@ static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
ctx, len, NULL);
}
static int f2fs_key_prefix(struct inode *inode, u8 **key)
{
*key = F2FS_I_SB(inode)->key_prefix;
return F2FS_I_SB(inode)->key_prefix_size;
}
static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
void *fs_data)
{
@@ -1140,16 +1236,16 @@ static unsigned f2fs_max_namelen(struct inode *inode)
inode->i_sb->s_blocksize : F2FS_NAME_LEN;
}
static struct fscrypt_operations f2fs_cryptops = {
static const struct fscrypt_operations f2fs_cryptops = {
.key_prefix = "f2fs:",
.get_context = f2fs_get_context,
.key_prefix = f2fs_key_prefix,
.set_context = f2fs_set_context,
.is_encrypted = f2fs_encrypted_inode,
.empty_dir = f2fs_empty_dir,
.max_namelen = f2fs_max_namelen,
};
#else
static struct fscrypt_operations f2fs_cryptops = {
static const struct fscrypt_operations f2fs_cryptops = {
.is_encrypted = f2fs_encrypted_inode,
};
#endif
@@ -1229,7 +1325,7 @@ static int __f2fs_commit_super(struct buffer_head *bh,
unlock_buffer(bh);
/* it's rare case, we can do fua all the time */
return __sync_dirty_buffer(bh, WRITE_FLUSH_FUA);
return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
}
static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
@@ -1424,6 +1520,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int total, fsmeta;
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned int ovp_segments, reserved_segments;
total = le32_to_cpu(raw_super->segment_count);
fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -1435,6 +1532,16 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (unlikely(fsmeta >= total))
return 1;
ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
ovp_segments == 0 || reserved_segments == 0)) {
f2fs_msg(sbi->sb, KERN_ERR,
"Wrong layout: check mkfs.f2fs version");
return 1;
}
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
@@ -1473,17 +1580,13 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_COUNT_TYPE; i++)
atomic_set(&sbi->nr_pages[i], 0);
atomic_set(&sbi->wb_sync_req, 0);
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
mutex_init(&sbi->wio_mutex[NODE]);
mutex_init(&sbi->wio_mutex[DATA]);
spin_lock_init(&sbi->cp_lock);
#ifdef CONFIG_F2FS_FS_ENCRYPTION
memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX,
F2FS_KEY_DESC_PREFIX_SIZE);
sbi->key_prefix_size = F2FS_KEY_DESC_PREFIX_SIZE;
#endif
}
static int init_percpu_info(struct f2fs_sb_info *sbi)
@@ -1498,6 +1601,71 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
GFP_KERNEL);
}
#ifdef CONFIG_BLK_DEV_ZONED
static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
{
struct block_device *bdev = FDEV(devi).bdev;
sector_t nr_sectors = bdev->bd_part->nr_sects;
sector_t sector = 0;
struct blk_zone *zones;
unsigned int i, nr_zones;
unsigned int n = 0;
int err = -EIO;
if (!f2fs_sb_mounted_blkzoned(sbi->sb))
return 0;
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)))
return -EINVAL;
sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev));
if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
__ilog2_u32(sbi->blocks_per_blkz))
return -EINVAL;
sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
sbi->log_blocks_per_blkz;
if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
FDEV(devi).nr_blkz++;
FDEV(devi).blkz_type = kmalloc(FDEV(devi).nr_blkz, GFP_KERNEL);
if (!FDEV(devi).blkz_type)
return -ENOMEM;
#define F2FS_REPORT_NR_ZONES 4096
zones = kcalloc(F2FS_REPORT_NR_ZONES, sizeof(struct blk_zone),
GFP_KERNEL);
if (!zones)
return -ENOMEM;
/* Get block zones type */
while (zones && sector < nr_sectors) {
nr_zones = F2FS_REPORT_NR_ZONES;
err = blkdev_report_zones(bdev, sector,
zones, &nr_zones,
GFP_KERNEL);
if (err)
break;
if (!nr_zones) {
err = -EIO;
break;
}
for (i = 0; i < nr_zones; i++) {
FDEV(devi).blkz_type[n] = zones[i].type;
sector += zones[i].len;
n++;
}
}
kfree(zones);
return err;
}
#endif
/*
* Read f2fs raw super block.
* Because we have two copies of super block, so read both of them
@@ -1590,6 +1758,104 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
return err;
}
static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
unsigned int max_devices = MAX_DEVICES;
int i;
/* Initialize single device information */
if (!RDEV(0).path[0]) {
#ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(sbi->sb->s_bdev) == BLK_ZONED_NONE)
return 0;
max_devices = 1;
#else
return 0;
#endif
}
/*
* Initialize multiple devices information, or single
* zoned block device information.
*/
sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info),
GFP_KERNEL);
if (!sbi->devs)
return -ENOMEM;
for (i = 0; i < max_devices; i++) {
if (i > 0 && !RDEV(i).path[0])
break;
if (max_devices == 1) {
/* Single zoned block device mount */
FDEV(0).bdev =
blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
sbi->sb->s_mode, sbi->sb->s_type);
} else {
/* Multi-device mount */
memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
FDEV(i).total_segments =
le32_to_cpu(RDEV(i).total_segments);
if (i == 0) {
FDEV(i).start_blk = 0;
FDEV(i).end_blk = FDEV(i).start_blk +
(FDEV(i).total_segments <<
sbi->log_blocks_per_seg) - 1 +
le32_to_cpu(raw_super->segment0_blkaddr);
} else {
FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
FDEV(i).end_blk = FDEV(i).start_blk +
(FDEV(i).total_segments <<
sbi->log_blocks_per_seg) - 1;
}
FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
sbi->sb->s_mode, sbi->sb->s_type);
}
if (IS_ERR(FDEV(i).bdev))
return PTR_ERR(FDEV(i).bdev);
/* to release errored devices */
sbi->s_ndevs = i + 1;
#ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
!f2fs_sb_mounted_blkzoned(sbi->sb)) {
f2fs_msg(sbi->sb, KERN_ERR,
"Zoned block device feature not enabled\n");
return -EINVAL;
}
if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
if (init_blkz_info(sbi, i)) {
f2fs_msg(sbi->sb, KERN_ERR,
"Failed to initialize F2FS blkzone information");
return -EINVAL;
}
if (max_devices == 1)
break;
f2fs_msg(sbi->sb, KERN_INFO,
"Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
i, FDEV(i).path,
FDEV(i).total_segments,
FDEV(i).start_blk, FDEV(i).end_blk,
bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
"Host-aware" : "Host-managed");
continue;
}
#endif
f2fs_msg(sbi->sb, KERN_INFO,
"Mount Device [%2d]: %20s, %8u, %8x - %8x",
i, FDEV(i).path,
FDEV(i).total_segments,
FDEV(i).start_blk, FDEV(i).end_blk);
}
f2fs_msg(sbi->sb, KERN_INFO,
"IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
return 0;
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
{
struct f2fs_sb_info *sbi;
@@ -1637,6 +1903,18 @@ try_onemore:
sb->s_fs_info = sbi;
sbi->raw_super = raw_super;
/*
* The BLKZONED feature indicates that the drive was formatted with
* zone alignment optimization. This is optional for host-aware
* devices, but mandatory for host-managed zoned block devices.
*/
#ifndef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_mounted_blkzoned(sb)) {
f2fs_msg(sb, KERN_ERR,
"Zoned block device support is not enabled\n");
goto free_sb_buf;
}
#endif
default_options(sbi);
/* parse mount options */
options = kstrdup((const char *)data, GFP_KERNEL);
@@ -1663,13 +1941,14 @@ try_onemore:
sb->s_time_gran = 1;
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0);
memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
/* init f2fs-specific super block info */
sbi->valid_super_block = valid_super_block;
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->cp_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
/* disallow all the data/node/meta page writes */
set_sbi_flag(sbi, SBI_POR_DOING);
@@ -1692,12 +1971,19 @@ try_onemore:
if (err)
goto free_options;
if (F2FS_IO_SIZE(sbi) > 1) {
sbi->write_io_dummy =
mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
if (!sbi->write_io_dummy)
goto free_options;
}
/* get an inode for meta space */
sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
if (IS_ERR(sbi->meta_inode)) {
f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
err = PTR_ERR(sbi->meta_inode);
goto free_options;
goto free_io_dummy;
}
err = get_valid_checkpoint(sbi);
@@ -1706,6 +1992,13 @@ try_onemore:
goto free_meta_inode;
}
/* Initialize device list */
err = f2fs_scan_devices(sbi);
if (err) {
f2fs_msg(sb, KERN_ERR, "Failed to find devices");
goto free_devices;
}
sbi->total_valid_node_count =
le32_to_cpu(sbi->ckpt->valid_node_count);
percpu_counter_set(&sbi->total_valid_inode_count,
@@ -1761,6 +2054,10 @@ try_onemore:
f2fs_join_shrinker(sbi);
err = f2fs_build_stats(sbi);
if (err)
goto free_nm;
/* if there are nt orphan nodes free them */
err = recover_orphan_inodes(sbi);
if (err)
@@ -1785,10 +2082,6 @@ try_onemore:
goto free_root_inode;
}
err = f2fs_build_stats(sbi);
if (err)
goto free_root_inode;
if (f2fs_proc_root)
sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root);
@@ -1865,6 +2158,8 @@ skip_recovery:
sbi->valid_super_block ? 1 : 2, err);
}
f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
cur_cp_version(F2FS_CKPT(sbi)));
f2fs_update_time(sbi, CP_TIME);
f2fs_update_time(sbi, REQ_TIME);
return 0;
@@ -1880,7 +2175,6 @@ free_proc:
remove_proc_entry("segment_bits", sbi->s_proc);
remove_proc_entry(sb->s_id, f2fs_proc_root);
}
f2fs_destroy_stats(sbi);
free_root_inode:
dput(sb->s_root);
sb->s_root = NULL;
@@ -1889,16 +2183,28 @@ free_node_inode:
mutex_lock(&sbi->umount_mutex);
release_ino_entry(sbi, true);
f2fs_leave_shrinker(sbi);
/*
* Some dirty meta pages can be produced by recover_orphan_inodes()
* failed by EIO. Then, iput(node_inode) can trigger balance_fs_bg()
* followed by write_checkpoint() through f2fs_write_node_pages(), which
* falls into an infinite loop in sync_meta_pages().
*/
truncate_inode_pages_final(META_MAPPING(sbi));
iput(sbi->node_inode);
mutex_unlock(&sbi->umount_mutex);
f2fs_destroy_stats(sbi);
free_nm:
destroy_node_manager(sbi);
free_sm:
destroy_segment_manager(sbi);
free_devices:
destroy_device_list(sbi);
kfree(sbi->ckpt);
free_meta_inode:
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
free_io_dummy:
mempool_destroy(sbi->write_io_dummy);
free_options:
destroy_percpu_info(sbi);
kfree(options);
@@ -2040,3 +2346,4 @@ module_exit(exit_f2fs_fs)
MODULE_AUTHOR("Samsung Electronics's Praesto Team");
MODULE_DESCRIPTION("Flash Friendly File System");
MODULE_LICENSE("GPL");

View File

@@ -59,7 +59,7 @@ void f2fs_trace_pid(struct page *page)
pid_t pid = task_pid_nr(current);
void *p;
page->private = pid;
set_page_private(page, (unsigned long)pid);
if (radix_tree_preload(GFP_NOFS))
return;
@@ -138,7 +138,7 @@ static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index,
radix_tree_for_each_slot(slot, &pids, &iter, first_index) {
results[ret] = iter.index;
if (++ret == PIDVEC_SIZE)
if (++ret == max_items)
break;
}
return ret;

View File

@@ -106,7 +106,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
return -EINVAL;
F2FS_I(inode)->i_advise |= *(char *)value;
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
@@ -217,18 +217,123 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
return entry;
}
static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
void **last_addr, int index,
size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2;
list_for_each_xattr(entry, base_addr) {
if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
(void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
base_addr + inline_size) {
*last_addr = entry;
return NULL;
}
if (entry->e_name_index != index)
continue;
if (entry->e_name_len != len)
continue;
if (!memcmp(entry->e_name, name, len))
break;
}
return entry;
}
static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
unsigned int index, unsigned int len,
const char *name, struct f2fs_xattr_entry **xe,
void **base_addr)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
void *cur_addr, *txattr_addr, *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
unsigned int inline_size = inline_xattr_size(inode);
int err = 0;
if (!size && !inline_size)
return -ENODATA;
txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE,
GFP_F2FS_ZERO);
if (!txattr_addr)
return -ENOMEM;
/* read from inline xattr */
if (inline_size) {
struct page *page = NULL;
void *inline_addr;
if (ipage) {
inline_addr = inline_xattr_addr(ipage);
} else {
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto out;
}
inline_addr = inline_xattr_addr(page);
}
memcpy(txattr_addr, inline_addr, inline_size);
f2fs_put_page(page, 1);
*xe = __find_inline_xattr(txattr_addr, &last_addr,
index, len, name);
if (*xe)
goto check;
}
/* read from xattr node block */
if (xnid) {
struct page *xpage;
void *xattr_addr;
/* The inode already has an extended attribute block. */
xpage = get_node_page(sbi, xnid);
if (IS_ERR(xpage)) {
err = PTR_ERR(xpage);
goto out;
}
xattr_addr = page_address(xpage);
memcpy(txattr_addr + inline_size, xattr_addr, size);
f2fs_put_page(xpage, 1);
}
if (last_addr)
cur_addr = XATTR_HDR(last_addr) - 1;
else
cur_addr = txattr_addr;
*xe = __find_xattr(cur_addr, index, len, name);
check:
if (IS_XATTR_LAST_ENTRY(*xe)) {
err = -ENODATA;
goto out;
}
*base_addr = txattr_addr;
return 0;
out:
kzfree(txattr_addr);
return err;
}
static int read_all_xattrs(struct inode *inode, struct page *ipage,
void **base_addr)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_xattr_header *header;
size_t size = PAGE_SIZE, inline_size = 0;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
unsigned int size = VALID_XATTR_BLOCK_SIZE;
unsigned int inline_size = inline_xattr_size(inode);
void *txattr_addr;
int err;
inline_size = inline_xattr_size(inode);
txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE,
GFP_F2FS_ZERO);
if (!txattr_addr)
return -ENOMEM;
@@ -252,19 +357,19 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
}
/* read from xattr node block */
if (F2FS_I(inode)->i_xattr_nid) {
if (xnid) {
struct page *xpage;
void *xattr_addr;
/* The inode already has an extended attribute block. */
xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
xpage = get_node_page(sbi, xnid);
if (IS_ERR(xpage)) {
err = PTR_ERR(xpage);
goto fail;
}
xattr_addr = page_address(xpage);
memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE);
memcpy(txattr_addr + inline_size, xattr_addr, size);
f2fs_put_page(xpage, 1);
}
@@ -286,14 +391,12 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
void *txattr_addr, struct page *ipage)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
size_t inline_size = 0;
size_t inline_size = inline_xattr_size(inode);
void *xattr_addr;
struct page *xpage;
nid_t new_nid = 0;
int err;
inline_size = inline_xattr_size(inode);
if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
if (!alloc_nid(sbi, &new_nid))
return -ENOSPC;
@@ -348,23 +451,20 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
}
xattr_addr = page_address(xpage);
memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
sizeof(struct node_footer));
memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE);
set_page_dirty(xpage);
f2fs_put_page(xpage, 1);
/* need to checkpoint during fsync */
F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
return 0;
}
int f2fs_getxattr(struct inode *inode, int index, const char *name,
void *buffer, size_t buffer_size, struct page *ipage)
{
struct f2fs_xattr_entry *entry;
void *base_addr;
struct f2fs_xattr_entry *entry = NULL;
int error = 0;
size_t size, len;
unsigned int size, len;
void *base_addr = NULL;
if (name == NULL)
return -EINVAL;
@@ -373,21 +473,16 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
error = read_all_xattrs(inode, ipage, &base_addr);
error = lookup_all_xattrs(inode, ipage, index, len, name,
&entry, &base_addr);
if (error)
return error;
entry = __find_xattr(base_addr, index, len, name);
if (IS_XATTR_LAST_ENTRY(entry)) {
error = -ENODATA;
goto cleanup;
}
size = le16_to_cpu(entry->e_value_size);
if (buffer && size > buffer_size) {
error = -ERANGE;
goto cleanup;
goto out;
}
if (buffer) {
@@ -395,8 +490,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
memcpy(buffer, pval, size);
}
error = size;
cleanup:
out:
kzfree(base_addr);
return error;
}
@@ -445,6 +539,15 @@ cleanup:
return error;
}
static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry,
const void *value, size_t size)
{
void *pval = entry->e_name + entry->e_name_len;
return (le16_to_cpu(entry->e_value_size) == size) &&
!memcmp(pval, value, size);
}
static int __f2fs_setxattr(struct inode *inode, int index,
const char *name, const void *value, size_t size,
struct page *ipage, int flags)
@@ -479,12 +582,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
if ((flags & XATTR_REPLACE) && !found) {
if (found) {
if ((flags & XATTR_CREATE)) {
error = -EEXIST;
goto exit;
}
if (f2fs_xattr_value_same(here, value, size))
goto exit;
} else if ((flags & XATTR_REPLACE)) {
error = -ENODATA;
goto exit;
} else if ((flags & XATTR_CREATE) && found) {
error = -EEXIST;
goto exit;
}
last = here;
@@ -554,7 +662,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (index == F2FS_XATTR_INDEX_ENCRYPTION &&
!strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT))
f2fs_set_encrypted_inode(inode);
f2fs_mark_inode_dirty_sync(inode);
f2fs_mark_inode_dirty_sync(inode, true);
if (!error && S_ISDIR(inode->i_mode))
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:

View File

@@ -58,10 +58,10 @@ struct f2fs_xattr_entry {
#define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1))
#define XATTR_ROUND (3)
#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND)
#define XATTR_ALIGN(size) (((size) + XATTR_ROUND) & ~XATTR_ROUND)
#define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \
entry->e_name_len + le16_to_cpu(entry->e_value_size)))
(entry)->e_name_len + le16_to_cpu((entry)->e_value_size)))
#define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\
ENTRY_SIZE(entry)))
@@ -72,9 +72,10 @@ struct f2fs_xattr_entry {
for (entry = XATTR_FIRST_ENTRY(addr);\
!IS_XATTR_LAST_ENTRY(entry);\
entry = XATTR_NEXT_ENTRY(entry))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \
sizeof(struct node_footer) - sizeof(__u32))
#define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
#define XATTR_PADDING_SIZE (sizeof(__u32))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
VALID_XATTR_BLOCK_SIZE)
#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \
sizeof(struct f2fs_xattr_header) - \

View File

@@ -32,9 +32,15 @@
/* 0, 1(node nid), 2(meta nid) are reserved node id */
#define F2FS_RESERVED_NODE_NUM 3
#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num)
#define F2FS_NODE_INO(sbi) (sbi->node_ino_num)
#define F2FS_META_INO(sbi) (sbi->meta_ino_num)
#define F2FS_ROOT_INO(sbi) ((sbi)->root_ino_num)
#define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num)
#define F2FS_META_INO(sbi) ((sbi)->meta_ino_num)
#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */
#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */
#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */
#define F2FS_IO_SIZE_BITS(sbi) ((sbi)->write_io_size_bits) /* power of 2 */
#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1)
/* This flag is used by node and meta inodes, and by recovery */
#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO)
@@ -52,10 +58,17 @@
#define VERSION_LEN 256
#define MAX_VOLUME_NAME 512
#define MAX_PATH_LEN 64
#define MAX_DEVICES 8
/*
* For superblock
*/
struct f2fs_device {
__u8 path[MAX_PATH_LEN];
__le32 total_segments;
} __packed;
struct f2fs_super_block {
__le32 magic; /* Magic Number */
__le16 major_ver; /* Major Version */
@@ -94,12 +107,15 @@ struct f2fs_super_block {
__le32 feature; /* defined features */
__u8 encryption_level; /* versioning level for encryption */
__u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */
__u8 reserved[871]; /* valid reserved region */
struct f2fs_device devs[MAX_DEVICES]; /* device list */
__u8 reserved[327]; /* valid reserved region */
} __packed;
/*
* For checkpoint
*/
#define CP_TRIMMED_FLAG 0x00000100
#define CP_NAT_BITS_FLAG 0x00000080
#define CP_CRC_RECOVERY_FLAG 0x00000040
#define CP_FASTBOOT_FLAG 0x00000020
#define CP_FSCK_FLAG 0x00000010
@@ -146,7 +162,7 @@ struct f2fs_checkpoint {
*/
#define F2FS_ORPHANS_PER_BLOCK 1020
#define GET_ORPHAN_BLOCKS(n) ((n + F2FS_ORPHANS_PER_BLOCK - 1) / \
#define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \
F2FS_ORPHANS_PER_BLOCK)
struct f2fs_orphan_block {
@@ -264,6 +280,7 @@ struct f2fs_node {
* For NAT entries
*/
#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8)
struct f2fs_nat_entry {
__u8 version; /* latest version of cached nat entry */
@@ -439,7 +456,7 @@ typedef __le32 f2fs_hash_t;
#define F2FS_SLOT_LEN 8
#define F2FS_SLOT_LEN_BITS 3
#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS)
#define GET_DENTRY_SLOTS(x) (((x) + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS)
/* MAX level for dir lookup */
#define MAX_DIR_HASH_DEPTH 63

View File

@@ -0,0 +1,134 @@
/*
* fscrypt_common.h: common declarations for per-file encryption
*
* Copyright (C) 2015, Google, Inc.
*
* Written by Michael Halcrow, 2015.
* Modified by Jaegeuk Kim, 2015.
*/
#ifndef _LINUX_FSCRYPT_COMMON_H
#define _LINUX_FSCRYPT_COMMON_H
#include <linux/key.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/bio.h>
#include <linux/dcache.h>
#include <crypto/skcipher.h>
#include <uapi/linux/fs.h>
#define FS_CRYPTO_BLOCK_SIZE 16
struct fscrypt_info;
struct fscrypt_ctx {
union {
struct {
struct page *bounce_page; /* Ciphertext page */
struct page *control_page; /* Original page */
} w;
struct {
struct bio *bio;
struct work_struct work;
} r;
struct list_head free_list; /* Free list */
};
u8 flags; /* Flags */
};
/**
* For encrypted symlinks, the ciphertext length is stored at the beginning
* of the string in little-endian format.
*/
struct fscrypt_symlink_data {
__le16 len;
char encrypted_path[1];
} __packed;
struct fscrypt_str {
unsigned char *name;
u32 len;
};
struct fscrypt_name {
const struct qstr *usr_fname;
struct fscrypt_str disk_name;
u32 hash;
u32 minor_hash;
struct fscrypt_str crypto_buf;
};
#define FSTR_INIT(n, l) { .name = n, .len = l }
#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len)
#define fname_name(p) ((p)->disk_name.name)
#define fname_len(p) ((p)->disk_name.len)
/*
* fscrypt superblock flags
*/
#define FS_CFLG_OWN_PAGES (1U << 1)
/*
* crypto opertions for filesystems
*/
struct fscrypt_operations {
unsigned int flags;
const char *key_prefix;
int (*get_context)(struct inode *, void *, size_t);
int (*set_context)(struct inode *, const void *, size_t, void *);
int (*dummy_context)(struct inode *);
bool (*is_encrypted)(struct inode *);
bool (*empty_dir)(struct inode *);
unsigned (*max_namelen)(struct inode *);
};
static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
{
if (inode->i_sb->s_cop->dummy_context &&
inode->i_sb->s_cop->dummy_context(inode))
return true;
return false;
}
static inline bool fscrypt_valid_contents_enc_mode(u32 mode)
{
return (mode == FS_ENCRYPTION_MODE_AES_256_XTS);
}
static inline bool fscrypt_valid_filenames_enc_mode(u32 mode)
{
return (mode == FS_ENCRYPTION_MODE_AES_256_CTS);
}
static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
{
if (str->len == 1 && str->name[0] == '.')
return true;
if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
return true;
return false;
}
static inline struct page *fscrypt_control_page(struct page *page)
{
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
#else
WARN_ON_ONCE(1);
return ERR_PTR(-EINVAL);
#endif
}
static inline int fscrypt_has_encryption_key(const struct inode *inode)
{
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
return (inode->i_crypt_info != NULL);
#else
return 0;
#endif
}
#endif /* _LINUX_FSCRYPT_COMMON_H */

View File

@@ -0,0 +1,177 @@
/*
* fscrypt_notsupp.h
*
* This stubs out the fscrypt functions for filesystems configured without
* encryption support.
*/
#ifndef _LINUX_FSCRYPT_NOTSUPP_H
#define _LINUX_FSCRYPT_NOTSUPP_H
#include <linux/fscrypt_common.h>
/* crypto.c */
static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode,
gfp_t gfp_flags)
{
return ERR_PTR(-EOPNOTSUPP);
}
static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
{
return;
}
static inline struct page *fscrypt_encrypt_page(const struct inode *inode,
struct page *page,
unsigned int len,
unsigned int offs,
u64 lblk_num, gfp_t gfp_flags)
{
return ERR_PTR(-EOPNOTSUPP);
}
static inline int fscrypt_decrypt_page(const struct inode *inode,
struct page *page,
unsigned int len, unsigned int offs,
u64 lblk_num)
{
return -EOPNOTSUPP;
}
static inline void fscrypt_restore_control_page(struct page *page)
{
return;
}
static inline void fscrypt_set_d_op(struct dentry *dentry)
{
return;
}
static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
{
return;
}
/* policy.c */
static inline int fscrypt_ioctl_set_policy(struct file *filp,
const void __user *arg)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_has_permitted_context(struct inode *parent,
struct inode *child)
{
return 0;
}
static inline int fscrypt_inherit_context(struct inode *parent,
struct inode *child,
void *fs_data, bool preload)
{
return -EOPNOTSUPP;
}
/* keyinfo.c */
static inline int fscrypt_get_encryption_info(struct inode *inode)
{
return -EOPNOTSUPP;
}
static inline void fscrypt_put_encryption_info(struct inode *inode,
struct fscrypt_info *ci)
{
return;
}
/* fname.c */
static inline int fscrypt_setup_filename(struct inode *dir,
const struct qstr *iname,
int lookup, struct fscrypt_name *fname)
{
if (dir->i_sb->s_cop->is_encrypted(dir))
return -EOPNOTSUPP;
memset(fname, 0, sizeof(struct fscrypt_name));
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *)iname->name;
fname->disk_name.len = iname->len;
return 0;
}
static inline void fscrypt_free_filename(struct fscrypt_name *fname)
{
return;
}
static inline u32 fscrypt_fname_encrypted_size(const struct inode *inode,
u32 ilen)
{
/* never happens */
WARN_ON(1);
return 0;
}
static inline int fscrypt_fname_alloc_buffer(const struct inode *inode,
u32 ilen,
struct fscrypt_str *crypto_str)
{
return -EOPNOTSUPP;
}
static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str)
{
return;
}
static inline int fscrypt_fname_disk_to_usr(struct inode *inode,
u32 hash, u32 minor_hash,
const struct fscrypt_str *iname,
struct fscrypt_str *oname)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_fname_usr_to_disk(struct inode *inode,
const struct qstr *iname,
struct fscrypt_str *oname)
{
return -EOPNOTSUPP;
}
static inline bool fscrypt_match_name(const struct fscrypt_name *fname,
const u8 *de_name, u32 de_name_len)
{
/* Encryption support disabled; use standard comparison */
if (de_name_len != fname->disk_name.len)
return false;
return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len);
}
/* bio.c */
static inline void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx,
struct bio *bio)
{
return;
}
static inline void fscrypt_pullback_bio_page(struct page **page, bool restore)
{
return;
}
static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
return -EOPNOTSUPP;
}
#endif /* _LINUX_FSCRYPT_NOTSUPP_H */

View File

@@ -0,0 +1,140 @@
/*
* fscrypt_supp.h
*
* This is included by filesystems configured with encryption support.
*/
#ifndef _LINUX_FSCRYPT_SUPP_H
#define _LINUX_FSCRYPT_SUPP_H
#include <linux/fscrypt_common.h>
/* crypto.c */
extern struct kmem_cache *fscrypt_info_cachep;
extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
extern void fscrypt_release_ctx(struct fscrypt_ctx *);
extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
unsigned int, unsigned int,
u64, gfp_t);
extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int,
unsigned int, u64);
extern void fscrypt_restore_control_page(struct page *);
extern const struct dentry_operations fscrypt_d_ops;
static inline void fscrypt_set_d_op(struct dentry *dentry)
{
d_set_d_op(dentry, &fscrypt_d_ops);
}
static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
{
spin_lock(&dentry->d_lock);
dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY;
spin_unlock(&dentry->d_lock);
}
/* policy.c */
extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
extern int fscrypt_inherit_context(struct inode *, struct inode *,
void *, bool);
/* keyinfo.c */
extern int fscrypt_get_encryption_info(struct inode *);
extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *);
/* fname.c */
extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
int lookup, struct fscrypt_name *);
extern void fscrypt_free_filename(struct fscrypt_name *);
extern u32 fscrypt_fname_encrypted_size(const struct inode *, u32);
extern int fscrypt_fname_alloc_buffer(const struct inode *, u32,
struct fscrypt_str *);
extern void fscrypt_fname_free_buffer(struct fscrypt_str *);
extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32,
const struct fscrypt_str *, struct fscrypt_str *);
extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *,
struct fscrypt_str *);
#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32
/* Extracts the second-to-last ciphertext block; see explanation below */
#define FSCRYPT_FNAME_DIGEST(name, len) \
((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \
FS_CRYPTO_BLOCK_SIZE))
#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE
/**
* fscrypt_digested_name - alternate identifier for an on-disk filename
*
* When userspace lists an encrypted directory without access to the key,
* filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE
* bytes are shown in this abbreviated form (base64-encoded) rather than as the
* full ciphertext (base64-encoded). This is necessary to allow supporting
* filenames up to NAME_MAX bytes, since base64 encoding expands the length.
*
* To make it possible for filesystems to still find the correct directory entry
* despite not knowing the full on-disk name, we encode any filesystem-specific
* 'hash' and/or 'minor_hash' which the filesystem may need for its lookups,
* followed by the second-to-last ciphertext block of the filename. Due to the
* use of the CBC-CTS encryption mode, the second-to-last ciphertext block
* depends on the full plaintext. (Note that ciphertext stealing causes the
* last two blocks to appear "flipped".) This makes accidental collisions very
* unlikely: just a 1 in 2^128 chance for two filenames to collide even if they
* share the same filesystem-specific hashes.
*
* However, this scheme isn't immune to intentional collisions, which can be
* created by anyone able to create arbitrary plaintext filenames and view them
* without the key. Making the "digest" be a real cryptographic hash like
* SHA-256 over the full ciphertext would prevent this, although it would be
* less efficient and harder to implement, especially since the filesystem would
* need to calculate it for each directory entry examined during a search.
*/
struct fscrypt_digested_name {
u32 hash;
u32 minor_hash;
u8 digest[FSCRYPT_FNAME_DIGEST_SIZE];
};
/**
* fscrypt_match_name() - test whether the given name matches a directory entry
* @fname: the name being searched for
* @de_name: the name from the directory entry
* @de_name_len: the length of @de_name in bytes
*
* Normally @fname->disk_name will be set, and in that case we simply compare
* that to the name stored in the directory entry. The only exception is that
* if we don't have the key for an encrypted directory and a filename in it is
* very long, then we won't have the full disk_name and we'll instead need to
* match against the fscrypt_digested_name.
*
* Return: %true if the name matches, otherwise %false.
*/
static inline bool fscrypt_match_name(const struct fscrypt_name *fname,
const u8 *de_name, u32 de_name_len)
{
if (unlikely(!fname->disk_name.name)) {
const struct fscrypt_digested_name *n =
(const void *)fname->crypto_buf.name;
if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_'))
return false;
if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE)
return false;
return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len),
n->digest, FSCRYPT_FNAME_DIGEST_SIZE);
}
if (de_name_len != fname->disk_name.len)
return false;
return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len);
}
/* bio.c */
extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
extern void fscrypt_pullback_bio_page(struct page **, bool);
extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
unsigned int);
#endif /* _LINUX_FSCRYPT_SUPP_H */

View File

@@ -1,409 +0,0 @@
/*
* General per-file encryption definition
*
* Copyright (C) 2015, Google, Inc.
*
* Written by Michael Halcrow, 2015.
* Modified by Jaegeuk Kim, 2015.
*/
#ifndef _LINUX_FSCRYPTO_H
#define _LINUX_FSCRYPTO_H
#include <linux/key.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/bio.h>
#include <linux/dcache.h>
#include <crypto/skcipher.h>
#include <uapi/linux/fs.h>
#define FS_KEY_DERIVATION_NONCE_SIZE 16
#define FS_ENCRYPTION_CONTEXT_FORMAT_V1 1
#define FS_POLICY_FLAGS_PAD_4 0x00
#define FS_POLICY_FLAGS_PAD_8 0x01
#define FS_POLICY_FLAGS_PAD_16 0x02
#define FS_POLICY_FLAGS_PAD_32 0x03
#define FS_POLICY_FLAGS_PAD_MASK 0x03
#define FS_POLICY_FLAGS_VALID 0x03
/* Encryption algorithms */
#define FS_ENCRYPTION_MODE_INVALID 0
#define FS_ENCRYPTION_MODE_AES_256_XTS 1
#define FS_ENCRYPTION_MODE_AES_256_GCM 2
#define FS_ENCRYPTION_MODE_AES_256_CBC 3
#define FS_ENCRYPTION_MODE_AES_256_CTS 4
/**
* Encryption context for inode
*
* Protector format:
* 1 byte: Protector format (1 = this version)
* 1 byte: File contents encryption mode
* 1 byte: File names encryption mode
* 1 byte: Flags
* 8 bytes: Master Key descriptor
* 16 bytes: Encryption Key derivation nonce
*/
struct fscrypt_context {
u8 format;
u8 contents_encryption_mode;
u8 filenames_encryption_mode;
u8 flags;
u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
} __packed;
/* Encryption parameters */
#define FS_XTS_TWEAK_SIZE 16
#define FS_AES_128_ECB_KEY_SIZE 16
#define FS_AES_256_GCM_KEY_SIZE 32
#define FS_AES_256_CBC_KEY_SIZE 32
#define FS_AES_256_CTS_KEY_SIZE 32
#define FS_AES_256_XTS_KEY_SIZE 64
#define FS_MAX_KEY_SIZE 64
#define FS_KEY_DESC_PREFIX "fscrypt:"
#define FS_KEY_DESC_PREFIX_SIZE 8
/* This is passed in from userspace into the kernel keyring */
struct fscrypt_key {
u32 mode;
u8 raw[FS_MAX_KEY_SIZE];
u32 size;
} __packed;
struct fscrypt_info {
u8 ci_data_mode;
u8 ci_filename_mode;
u8 ci_flags;
struct crypto_skcipher *ci_ctfm;
u8 ci_master_key[FS_KEY_DESCRIPTOR_SIZE];
};
#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
#define FS_WRITE_PATH_FL 0x00000002
struct fscrypt_ctx {
union {
struct {
struct page *bounce_page; /* Ciphertext page */
struct page *control_page; /* Original page */
} w;
struct {
struct bio *bio;
struct work_struct work;
} r;
struct list_head free_list; /* Free list */
};
u8 flags; /* Flags */
u8 mode; /* Encryption mode for tfm */
};
struct fscrypt_completion_result {
struct completion completion;
int res;
};
#define DECLARE_FS_COMPLETION_RESULT(ecr) \
struct fscrypt_completion_result ecr = { \
COMPLETION_INITIALIZER((ecr).completion), 0 }
#define FS_FNAME_NUM_SCATTER_ENTRIES 4
#define FS_CRYPTO_BLOCK_SIZE 16
#define FS_FNAME_CRYPTO_DIGEST_SIZE 32
/**
* For encrypted symlinks, the ciphertext length is stored at the beginning
* of the string in little-endian format.
*/
struct fscrypt_symlink_data {
__le16 len;
char encrypted_path[1];
} __packed;
/**
* This function is used to calculate the disk space required to
* store a filename of length l in encrypted symlink format.
*/
static inline u32 fscrypt_symlink_data_len(u32 l)
{
if (l < FS_CRYPTO_BLOCK_SIZE)
l = FS_CRYPTO_BLOCK_SIZE;
return (l + sizeof(struct fscrypt_symlink_data) - 1);
}
struct fscrypt_str {
unsigned char *name;
u32 len;
};
struct fscrypt_name {
const struct qstr *usr_fname;
struct fscrypt_str disk_name;
u32 hash;
u32 minor_hash;
struct fscrypt_str crypto_buf;
};
#define FSTR_INIT(n, l) { .name = n, .len = l }
#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len)
#define fname_name(p) ((p)->disk_name.name)
#define fname_len(p) ((p)->disk_name.len)
/*
* crypto opertions for filesystems
*/
struct fscrypt_operations {
int (*get_context)(struct inode *, void *, size_t);
int (*key_prefix)(struct inode *, u8 **);
int (*prepare_context)(struct inode *);
int (*set_context)(struct inode *, const void *, size_t, void *);
int (*dummy_context)(struct inode *);
bool (*is_encrypted)(struct inode *);
bool (*empty_dir)(struct inode *);
unsigned (*max_namelen)(struct inode *);
};
static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
{
if (inode->i_sb->s_cop->dummy_context &&
inode->i_sb->s_cop->dummy_context(inode))
return true;
return false;
}
static inline bool fscrypt_valid_contents_enc_mode(u32 mode)
{
return (mode == FS_ENCRYPTION_MODE_AES_256_XTS);
}
static inline bool fscrypt_valid_filenames_enc_mode(u32 mode)
{
return (mode == FS_ENCRYPTION_MODE_AES_256_CTS);
}
static inline bool fscrypt_is_dot_dotdot(const struct qstr *str)
{
if (str->len == 1 && str->name[0] == '.')
return true;
if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.')
return true;
return false;
}
static inline struct page *fscrypt_control_page(struct page *page)
{
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
#else
WARN_ON_ONCE(1);
return ERR_PTR(-EINVAL);
#endif
}
static inline int fscrypt_has_encryption_key(struct inode *inode)
{
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
return (inode->i_crypt_info != NULL);
#else
return 0;
#endif
}
static inline void fscrypt_set_encrypted_dentry(struct dentry *dentry)
{
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
spin_lock(&dentry->d_lock);
dentry->d_flags |= DCACHE_ENCRYPTED_WITH_KEY;
spin_unlock(&dentry->d_lock);
#endif
}
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
extern const struct dentry_operations fscrypt_d_ops;
#endif
static inline void fscrypt_set_d_op(struct dentry *dentry)
{
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
d_set_d_op(dentry, &fscrypt_d_ops);
#endif
}
#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
/* crypto.c */
extern struct kmem_cache *fscrypt_info_cachep;
int fscrypt_initialize(void);
extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *, gfp_t);
extern void fscrypt_release_ctx(struct fscrypt_ctx *);
extern struct page *fscrypt_encrypt_page(struct inode *, struct page *, gfp_t);
extern int fscrypt_decrypt_page(struct page *);
extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *);
extern void fscrypt_pullback_bio_page(struct page **, bool);
extern void fscrypt_restore_control_page(struct page *);
extern int fscrypt_zeroout_range(struct inode *, pgoff_t, sector_t,
unsigned int);
/* policy.c */
extern int fscrypt_process_policy(struct file *, const struct fscrypt_policy *);
extern int fscrypt_get_policy(struct inode *, struct fscrypt_policy *);
extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
extern int fscrypt_inherit_context(struct inode *, struct inode *,
void *, bool);
/* keyinfo.c */
extern int fscrypt_get_encryption_info(struct inode *);
extern void fscrypt_put_encryption_info(struct inode *, struct fscrypt_info *);
/* fname.c */
extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
int lookup, struct fscrypt_name *);
extern void fscrypt_free_filename(struct fscrypt_name *);
extern u32 fscrypt_fname_encrypted_size(struct inode *, u32);
extern int fscrypt_fname_alloc_buffer(struct inode *, u32,
struct fscrypt_str *);
extern void fscrypt_fname_free_buffer(struct fscrypt_str *);
extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32,
const struct fscrypt_str *, struct fscrypt_str *);
extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *,
struct fscrypt_str *);
#endif
/* crypto.c */
static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i,
gfp_t f)
{
return ERR_PTR(-EOPNOTSUPP);
}
static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c)
{
return;
}
static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i,
struct page *p, gfp_t f)
{
return ERR_PTR(-EOPNOTSUPP);
}
static inline int fscrypt_notsupp_decrypt_page(struct page *p)
{
return -EOPNOTSUPP;
}
static inline void fscrypt_notsupp_decrypt_bio_pages(struct fscrypt_ctx *c,
struct bio *b)
{
return;
}
static inline void fscrypt_notsupp_pullback_bio_page(struct page **p, bool b)
{
return;
}
static inline void fscrypt_notsupp_restore_control_page(struct page *p)
{
return;
}
static inline int fscrypt_notsupp_zeroout_range(struct inode *i, pgoff_t p,
sector_t s, unsigned int f)
{
return -EOPNOTSUPP;
}
/* policy.c */
static inline int fscrypt_notsupp_process_policy(struct file *f,
const struct fscrypt_policy *p)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_notsupp_get_policy(struct inode *i,
struct fscrypt_policy *p)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_notsupp_has_permitted_context(struct inode *p,
struct inode *i)
{
return 0;
}
static inline int fscrypt_notsupp_inherit_context(struct inode *p,
struct inode *i, void *v, bool b)
{
return -EOPNOTSUPP;
}
/* keyinfo.c */
static inline int fscrypt_notsupp_get_encryption_info(struct inode *i)
{
return -EOPNOTSUPP;
}
static inline void fscrypt_notsupp_put_encryption_info(struct inode *i,
struct fscrypt_info *f)
{
return;
}
/* fname.c */
static inline int fscrypt_notsupp_setup_filename(struct inode *dir,
const struct qstr *iname,
int lookup, struct fscrypt_name *fname)
{
if (dir->i_sb->s_cop->is_encrypted(dir))
return -EOPNOTSUPP;
memset(fname, 0, sizeof(struct fscrypt_name));
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *)iname->name;
fname->disk_name.len = iname->len;
return 0;
}
static inline void fscrypt_notsupp_free_filename(struct fscrypt_name *fname)
{
return;
}
static inline u32 fscrypt_notsupp_fname_encrypted_size(struct inode *i, u32 s)
{
/* never happens */
WARN_ON(1);
return 0;
}
static inline int fscrypt_notsupp_fname_alloc_buffer(struct inode *inode,
u32 ilen, struct fscrypt_str *crypto_str)
{
return -EOPNOTSUPP;
}
static inline void fscrypt_notsupp_fname_free_buffer(struct fscrypt_str *c)
{
return;
}
static inline int fscrypt_notsupp_fname_disk_to_usr(struct inode *inode,
u32 hash, u32 minor_hash,
const struct fscrypt_str *iname,
struct fscrypt_str *oname)
{
return -EOPNOTSUPP;
}
static inline int fscrypt_notsupp_fname_usr_to_disk(struct inode *inode,
const struct qstr *iname,
struct fscrypt_str *oname)
{
return -EOPNOTSUPP;
}
#endif /* _LINUX_FSCRYPTO_H */

View File

@@ -6,8 +6,8 @@
#include <linux/tracepoint.h>
#define show_dev(entry) MAJOR(entry->dev), MINOR(entry->dev)
#define show_dev_ino(entry) show_dev(entry), (unsigned long)entry->ino
#define show_dev(dev) MAJOR(dev), MINOR(dev)
#define show_dev_ino(entry) show_dev(entry->dev), (unsigned long)entry->ino
TRACE_DEFINE_ENUM(NODE);
TRACE_DEFINE_ENUM(DATA);
@@ -15,6 +15,8 @@ TRACE_DEFINE_ENUM(META);
TRACE_DEFINE_ENUM(META_FLUSH);
TRACE_DEFINE_ENUM(INMEM);
TRACE_DEFINE_ENUM(INMEM_DROP);
TRACE_DEFINE_ENUM(INMEM_INVALIDATE);
TRACE_DEFINE_ENUM(INMEM_REVOKE);
TRACE_DEFINE_ENUM(IPU);
TRACE_DEFINE_ENUM(OPU);
TRACE_DEFINE_ENUM(CURSEG_HOT_DATA);
@@ -42,6 +44,7 @@ TRACE_DEFINE_ENUM(CP_FASTBOOT);
TRACE_DEFINE_ENUM(CP_SYNC);
TRACE_DEFINE_ENUM(CP_RECOVERY);
TRACE_DEFINE_ENUM(CP_DISCARD);
TRACE_DEFINE_ENUM(CP_TRIMMED);
#define show_block_type(type) \
__print_symbolic(type, \
@@ -51,32 +54,34 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
{ META_FLUSH, "META_FLUSH" }, \
{ INMEM, "INMEM" }, \
{ INMEM_DROP, "INMEM_DROP" }, \
{ INMEM_INVALIDATE, "INMEM_INVALIDATE" }, \
{ INMEM_REVOKE, "INMEM_REVOKE" }, \
{ IPU, "IN-PLACE" }, \
{ OPU, "OUT-OF-PLACE" })
#define F2FS_BIO_FLAG_MASK(t) (t & (REQ_RAHEAD | WRITE_FLUSH_FUA))
#define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO))
#define F2FS_OP_FLAGS (REQ_RAHEAD | REQ_SYNC | REQ_META | REQ_PRIO | \
REQ_PREFLUSH | REQ_FUA)
#define F2FS_BIO_FLAG_MASK(t) (t & F2FS_OP_FLAGS)
#define show_bio_type(op_flags) show_bio_op_flags(op_flags), \
show_bio_extra(op_flags)
#define show_bio_type(op,op_flags) show_bio_op(op), \
show_bio_op_flags(op_flags)
#define show_bio_op(op) \
__print_symbolic(op, \
{ REQ_OP_READ, "READ" }, \
{ REQ_OP_WRITE, "WRITE" }, \
{ REQ_OP_DISCARD, "DISCARD" }, \
{ REQ_OP_SECURE_ERASE, "SECURE_ERASE" }, \
{ REQ_OP_WRITE_SAME, "WRITE_SAME" })
#define show_bio_op_flags(flags) \
__print_symbolic(F2FS_BIO_FLAG_MASK(flags), \
{ 0, "WRITE" }, \
{ REQ_RAHEAD, "READAHEAD" }, \
{ READ_SYNC, "READ_SYNC" }, \
{ WRITE_SYNC, "WRITE_SYNC" }, \
{ WRITE_FLUSH, "WRITE_FLUSH" }, \
{ WRITE_FUA, "WRITE_FUA" }, \
{ WRITE_FLUSH_FUA, "WRITE_FLUSH_FUA" })
#define show_bio_extra(type) \
__print_symbolic(F2FS_BIO_EXTRA_MASK(type), \
{ REQ_META, "(M)" }, \
{ REQ_PRIO, "(P)" }, \
{ REQ_META | REQ_PRIO, "(MP)" }, \
{ 0, " \b" })
__print_flags(F2FS_BIO_FLAG_MASK(flags), "|", \
{ REQ_RAHEAD, "R" }, \
{ REQ_SYNC, "S" }, \
{ REQ_META, "M" }, \
{ REQ_PRIO, "P" }, \
{ REQ_PREFLUSH, "PF" }, \
{ REQ_FUA, "FUA" })
#define show_data_type(type) \
__print_symbolic(type, \
@@ -109,12 +114,14 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
{ GC_CB, "Cost-Benefit" })
#define show_cpreason(type) \
__print_symbolic(type, \
__print_flags(type, "|", \
{ CP_UMOUNT, "Umount" }, \
{ CP_FASTBOOT, "Fastboot" }, \
{ CP_SYNC, "Sync" }, \
{ CP_RECOVERY, "Recovery" }, \
{ CP_DISCARD, "Discard" })
{ CP_DISCARD, "Discard" }, \
{ CP_UMOUNT, "Umount" }, \
{ CP_TRIMMED, "Trimmed" })
struct victim_sel_policy;
struct f2fs_map_blocks;
@@ -237,7 +244,7 @@ TRACE_EVENT(f2fs_sync_fs,
),
TP_printk("dev = (%d,%d), superblock is %s, wait = %d",
show_dev(__entry),
show_dev(__entry->dev),
__entry->dirty ? "dirty" : "not dirty",
__entry->wait)
);
@@ -307,6 +314,13 @@ DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit,
TP_ARGS(inode, ret)
);
DEFINE_EVENT(f2fs__inode_exit, f2fs_drop_inode,
TP_PROTO(struct inode *inode, int ret),
TP_ARGS(inode, ret)
);
DEFINE_EVENT(f2fs__inode, f2fs_truncate,
TP_PROTO(struct inode *inode),
@@ -536,7 +550,7 @@ TRACE_EVENT(f2fs_background_gc,
),
TP_printk("dev = (%d,%d), wait_ms = %ld, prefree = %u, free = %u",
show_dev(__entry),
show_dev(__entry->dev),
__entry->wait_ms,
__entry->prefree,
__entry->free)
@@ -557,6 +571,7 @@ TRACE_EVENT(f2fs_get_victim,
__field(int, alloc_mode)
__field(int, gc_mode)
__field(unsigned int, victim)
__field(unsigned int, cost)
__field(unsigned int, ofs_unit)
__field(unsigned int, pre_victim)
__field(unsigned int, prefree)
@@ -570,20 +585,23 @@ TRACE_EVENT(f2fs_get_victim,
__entry->alloc_mode = p->alloc_mode;
__entry->gc_mode = p->gc_mode;
__entry->victim = p->min_segno;
__entry->cost = p->min_cost;
__entry->ofs_unit = p->ofs_unit;
__entry->pre_victim = pre_victim;
__entry->prefree = prefree;
__entry->free = free;
),
TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), victim = %u "
"ofs_unit = %u, pre_victim_secno = %d, prefree = %u, free = %u",
show_dev(__entry),
TP_printk("dev = (%d,%d), type = %s, policy = (%s, %s, %s), "
"victim = %u, cost = %u, ofs_unit = %u, "
"pre_victim_secno = %d, prefree = %u, free = %u",
show_dev(__entry->dev),
show_data_type(__entry->type),
show_gc_type(__entry->gc_type),
show_alloc_mode(__entry->alloc_mode),
show_victim_policy(__entry->gc_mode),
__entry->victim,
__entry->cost,
__entry->ofs_unit,
(int)__entry->pre_victim,
__entry->prefree,
@@ -715,7 +733,7 @@ TRACE_EVENT(f2fs_reserve_new_blocks,
),
TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu",
show_dev(__entry),
show_dev(__entry->dev),
(unsigned int)__entry->nid,
__entry->ofs_in_node,
(unsigned long long)__entry->count)
@@ -750,12 +768,12 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio,
),
TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, "
"oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s%s, type = %s",
"oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s",
show_dev_ino(__entry),
(unsigned long)__entry->index,
(unsigned long long)__entry->old_blkaddr,
(unsigned long long)__entry->new_blkaddr,
show_bio_type(__entry->op_flags),
show_bio_type(__entry->op, __entry->op_flags),
show_block_type(__entry->type))
);
@@ -777,15 +795,15 @@ DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_mbio,
TP_CONDITION(page->mapping)
);
DECLARE_EVENT_CLASS(f2fs__submit_bio,
DECLARE_EVENT_CLASS(f2fs__bio,
TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
struct bio *bio),
TP_PROTO(struct super_block *sb, int type, struct bio *bio),
TP_ARGS(sb, fio, bio),
TP_ARGS(sb, type, bio),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(dev_t, target)
__field(int, op)
__field(int, op_flags)
__field(int, type)
@@ -795,37 +813,55 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio,
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->op = fio->op;
__entry->op_flags = fio->op_flags;
__entry->type = fio->type;
__entry->target = bio->bi_bdev->bd_dev;
__entry->op = bio_op(bio);
__entry->op_flags = bio->bi_opf;
__entry->type = type;
__entry->sector = bio->bi_iter.bi_sector;
__entry->size = bio->bi_iter.bi_size;
),
TP_printk("dev = (%d,%d), rw = %s%s, %s, sector = %lld, size = %u",
show_dev(__entry),
show_bio_type(__entry->op_flags),
TP_printk("dev = (%d,%d)/(%d,%d), rw = %s(%s), %s, sector = %lld, size = %u",
show_dev(__entry->target),
show_dev(__entry->dev),
show_bio_type(__entry->op, __entry->op_flags),
show_block_type(__entry->type),
(unsigned long long)__entry->sector,
__entry->size)
);
DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_write_bio,
DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_prepare_write_bio,
TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
struct bio *bio),
TP_PROTO(struct super_block *sb, int type, struct bio *bio),
TP_ARGS(sb, fio, bio),
TP_ARGS(sb, type, bio),
TP_CONDITION(bio)
);
DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio,
DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_prepare_read_bio,
TP_PROTO(struct super_block *sb, struct f2fs_io_info *fio,
struct bio *bio),
TP_PROTO(struct super_block *sb, int type, struct bio *bio),
TP_ARGS(sb, fio, bio),
TP_ARGS(sb, type, bio),
TP_CONDITION(bio)
);
DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_submit_read_bio,
TP_PROTO(struct super_block *sb, int type, struct bio *bio),
TP_ARGS(sb, type, bio),
TP_CONDITION(bio)
);
DEFINE_EVENT_CONDITION(f2fs__bio, f2fs_submit_write_bio,
TP_PROTO(struct super_block *sb, int type, struct bio *bio),
TP_ARGS(sb, type, bio),
TP_CONDITION(bio)
);
@@ -1084,16 +1120,16 @@ TRACE_EVENT(f2fs_write_checkpoint,
),
TP_printk("dev = (%d,%d), checkpoint for %s, state = %s",
show_dev(__entry),
show_dev(__entry->dev),
show_cpreason(__entry->reason),
__entry->msg)
);
TRACE_EVENT(f2fs_issue_discard,
DECLARE_EVENT_CLASS(f2fs_discard,
TP_PROTO(struct super_block *sb, block_t blkstart, block_t blklen),
TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
TP_ARGS(sb, blkstart, blklen),
TP_ARGS(dev, blkstart, blklen),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -1102,40 +1138,78 @@ TRACE_EVENT(f2fs_issue_discard,
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->dev = dev->bd_dev;
__entry->blkstart = blkstart;
__entry->blklen = blklen;
),
TP_printk("dev = (%d,%d), blkstart = 0x%llx, blklen = 0x%llx",
show_dev(__entry),
show_dev(__entry->dev),
(unsigned long long)__entry->blkstart,
(unsigned long long)__entry->blklen)
);
DEFINE_EVENT(f2fs_discard, f2fs_queue_discard,
TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
TP_ARGS(dev, blkstart, blklen)
);
DEFINE_EVENT(f2fs_discard, f2fs_issue_discard,
TP_PROTO(struct block_device *dev, block_t blkstart, block_t blklen),
TP_ARGS(dev, blkstart, blklen)
);
TRACE_EVENT(f2fs_issue_reset_zone,
TP_PROTO(struct block_device *dev, block_t blkstart),
TP_ARGS(dev, blkstart),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(block_t, blkstart)
),
TP_fast_assign(
__entry->dev = dev->bd_dev;
__entry->blkstart = blkstart;
),
TP_printk("dev = (%d,%d), reset zone at block = 0x%llx",
show_dev(__entry->dev),
(unsigned long long)__entry->blkstart)
);
TRACE_EVENT(f2fs_issue_flush,
TP_PROTO(struct super_block *sb, unsigned int nobarrier,
unsigned int flush_merge),
TP_PROTO(struct block_device *dev, unsigned int nobarrier,
unsigned int flush_merge, int ret),
TP_ARGS(sb, nobarrier, flush_merge),
TP_ARGS(dev, nobarrier, flush_merge, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned int, nobarrier)
__field(unsigned int, flush_merge)
__field(int, ret)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->dev = dev->bd_dev;
__entry->nobarrier = nobarrier;
__entry->flush_merge = flush_merge;
__entry->ret = ret;
),
TP_printk("dev = (%d,%d), %s %s",
show_dev(__entry),
TP_printk("dev = (%d,%d), %s %s, ret = %d",
show_dev(__entry->dev),
__entry->nobarrier ? "skip (nobarrier)" : "issue",
__entry->flush_merge ? " with flush_merge" : "")
__entry->flush_merge ? " with flush_merge" : "",
__entry->ret)
);
TRACE_EVENT(f2fs_lookup_extent_tree_start,
@@ -1248,7 +1322,7 @@ TRACE_EVENT(f2fs_shrink_extent_tree,
),
TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u",
show_dev(__entry),
show_dev(__entry->dev),
__entry->node_cnt,
__entry->tree_cnt)
);
@@ -1295,7 +1369,7 @@ DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
),
TP_printk("dev = (%d,%d), %s, dirty count = %lld",
show_dev(__entry),
show_dev(__entry->dev),
show_file_type(__entry->type),
__entry->count)
);

View File

@@ -255,18 +255,45 @@ struct fsxattr {
/* Policy provided via an ioctl on the topmost directory */
#define FS_KEY_DESCRIPTOR_SIZE 8
#define FS_POLICY_FLAGS_PAD_4 0x00
#define FS_POLICY_FLAGS_PAD_8 0x01
#define FS_POLICY_FLAGS_PAD_16 0x02
#define FS_POLICY_FLAGS_PAD_32 0x03
#define FS_POLICY_FLAGS_PAD_MASK 0x03
#define FS_POLICY_FLAGS_VALID 0x03
/* Encryption algorithms */
#define FS_ENCRYPTION_MODE_INVALID 0
#define FS_ENCRYPTION_MODE_AES_256_XTS 1
#define FS_ENCRYPTION_MODE_AES_256_GCM 2
#define FS_ENCRYPTION_MODE_AES_256_CBC 3
#define FS_ENCRYPTION_MODE_AES_256_CTS 4
struct fscrypt_policy {
__u8 version;
__u8 contents_encryption_mode;
__u8 filenames_encryption_mode;
__u8 flags;
__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
} __packed;
};
#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy)
#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy)
/* Parameters for passing an encryption key into the kernel keyring */
#define FS_KEY_DESC_PREFIX "fscrypt:"
#define FS_KEY_DESC_PREFIX_SIZE 8
/* Structure that userspace passes to the kernel keyring */
#define FS_MAX_KEY_SIZE 64
struct fscrypt_key {
__u32 mode;
__u8 raw[FS_MAX_KEY_SIZE];
__u32 size;
};
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
*