Patch series "userfaultfd: add minor fault handling for shmem", v6. Overview ======== See the series which added minor faults for hugetlbfs [3] for a detailed overview of minor fault handling in general. This series adds the same support for shmem-backed areas. This series is structured as follows: - Commits 1 and 2 are cleanups. - Commits 3 and 4 implement the new feature (minor fault handling for shmem). - Commit 5 advertises that the feature is now available since at this point it's fully implemented. - Commit 6 is a final cleanup, modifying an existing code path to re-use a new helper we've introduced. - Commits 7, 8, 9, 10 update the userfaultfd selftest to exercise the feature. Use Case ======== In some cases it is useful to have VM memory backed by tmpfs instead of hugetlbfs. So, this feature will be used to support the same VM live migration use case described in my original series. Additionally, Android folks (Lokesh Gidra <lokeshgidra@google.com>) hope to optimize the Android Runtime garbage collector using this feature: "The plan is to use userfaultfd for concurrently compacting the heap. With this feature, the heap can be shared-mapped at another location where the GC-thread(s) could continue the compaction operation without the need to invoke userfault ioctl(UFFDIO_COPY) each time. OTOH, if and when Java threads get faults on the heap, UFFDIO_CONTINUE can be used to resume execution. Furthermore, this feature enables updating references in the 'non-moving' portion of the heap efficiently. Without this feature, uneccessary page copying (ioctl(UFFDIO_COPY)) would be required." [1] https://lore.kernel.org/patchwork/cover/1388144/ [2] https://lore.kernel.org/patchwork/patch/1408161/ [3] https://lore.kernel.org/linux-fsdevel/20210301222728.176417-1-axelrasmussen@google.com/T/#t This patch (of 9): Previously, we did a dance where we had one calling path in userfaultfd.c (mfill_atomic_pte), but then we split it into two in shmem_fs.h (shmem_{mcopy_atomic,mfill_zeropage}_pte), and then rejoined into a single shared function in shmem.c (shmem_mfill_atomic_pte). This is all a bit overly complex. Just call the single combined shmem function directly, allowing us to clean up various branches, boilerplate, etc. While we're touching this function, two other small cleanup changes: - offset is equivalent to pgoff, so we can get rid of offset entirely. - Split two VM_BUG_ON cases into two statements. This means the line number reported when the BUG is hit specifies exactly which condition was true. Link: https://lkml.kernel.org/r/20210503180737.2487560-1-axelrasmussen@google.com Link: https://lkml.kernel.org/r/20210503180737.2487560-3-axelrasmussen@google.com Signed-off-by: Axel Rasmussen <axelrasmussen@google.com> Reviewed-by: Peter Xu <peterx@redhat.com> Acked-by: Hugh Dickins <hughd@google.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Brian Geffon <bgeffon@google.com> Cc: "Dr . David Alan Gilbert" <dgilbert@redhat.com> Cc: Jerome Glisse <jglisse@redhat.com> Cc: Joe Perches <joe@perches.com> Cc: Kirill A. Shutemov <kirill@shutemov.name> Cc: Lokesh Gidra <lokeshgidra@google.com> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> Cc: Mina Almasry <almasrymina@google.com> Cc: Oliver Upton <oupton@google.com> Cc: Shaohua Li <shli@fb.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Wang Qing <wangqing@vivo.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> (cherry picked from commit f7e89f242f0dfcdd62e7aeecebdc2620e4792954 https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git akpm) Link: https://lore.kernel.org/patchwork/patch/1420969/ Conflicts: 1. include/linux/shmem_fs.h 2. mm/shmem.c 3. mm/userfaultfd.c (All resolved by manual rebase) Signed-off-by: Lokesh Gidra <lokeshgidra@google.com> Bug: 187930641 Change-Id: I8b443809643339b407d0bc06bf96146ecfc5a9fa
137 lines
4.8 KiB
C
137 lines
4.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __SHMEM_FS_H
|
|
#define __SHMEM_FS_H
|
|
|
|
#include <linux/file.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/mempolicy.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/percpu_counter.h>
|
|
#include <linux/xattr.h>
|
|
|
|
/* inode in-kernel data */
|
|
|
|
struct shmem_inode_info {
|
|
spinlock_t lock;
|
|
unsigned int seals; /* shmem seals */
|
|
unsigned long flags;
|
|
unsigned long alloced; /* data pages alloced to file */
|
|
unsigned long swapped; /* subtotal assigned to swap */
|
|
struct list_head shrinklist; /* shrinkable hpage inodes */
|
|
struct list_head swaplist; /* chain of maybes on swap */
|
|
struct shared_policy policy; /* NUMA memory alloc policy */
|
|
struct simple_xattrs xattrs; /* list of xattrs */
|
|
struct inode vfs_inode;
|
|
};
|
|
|
|
struct shmem_sb_info {
|
|
unsigned long max_blocks; /* How many blocks are allowed */
|
|
struct percpu_counter used_blocks; /* How many are allocated */
|
|
unsigned long max_inodes; /* How many inodes are allowed */
|
|
unsigned long free_inodes; /* How many are left for allocation */
|
|
spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
|
|
umode_t mode; /* Mount mode for root directory */
|
|
unsigned char huge; /* Whether to try for hugepages */
|
|
kuid_t uid; /* Mount uid for root directory */
|
|
kgid_t gid; /* Mount gid for root directory */
|
|
struct mempolicy *mpol; /* default memory policy for mappings */
|
|
spinlock_t shrinklist_lock; /* Protects shrinklist */
|
|
struct list_head shrinklist; /* List of shinkable inodes */
|
|
unsigned long shrinklist_len; /* Length of shrinklist */
|
|
};
|
|
|
|
static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
|
|
{
|
|
return container_of(inode, struct shmem_inode_info, vfs_inode);
|
|
}
|
|
|
|
/*
|
|
* Functions in mm/shmem.c called directly from elsewhere:
|
|
*/
|
|
extern int shmem_init(void);
|
|
extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
|
|
extern struct file *shmem_file_setup(const char *name,
|
|
loff_t size, unsigned long flags);
|
|
extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
|
|
unsigned long flags);
|
|
extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt,
|
|
const char *name, loff_t size, unsigned long flags);
|
|
extern int shmem_zero_setup(struct vm_area_struct *);
|
|
extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
|
|
unsigned long len, unsigned long pgoff, unsigned long flags);
|
|
extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
|
|
#ifdef CONFIG_SHMEM
|
|
extern bool shmem_mapping(struct address_space *mapping);
|
|
#else
|
|
static inline bool shmem_mapping(struct address_space *mapping)
|
|
{
|
|
return false;
|
|
}
|
|
#endif /* CONFIG_SHMEM */
|
|
extern void shmem_unlock_mapping(struct address_space *mapping);
|
|
extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
|
|
pgoff_t index, gfp_t gfp_mask);
|
|
extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
|
|
extern int shmem_unuse(swp_entry_t entry, struct page *page);
|
|
|
|
extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
|
|
extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
|
|
pgoff_t start, pgoff_t end);
|
|
|
|
/* Flag allocation requirements to shmem_getpage */
|
|
enum sgp_type {
|
|
SGP_READ, /* don't exceed i_size, don't allocate page */
|
|
SGP_CACHE, /* don't exceed i_size, may allocate page */
|
|
SGP_NOHUGE, /* like SGP_CACHE, but no huge pages */
|
|
SGP_HUGE, /* like SGP_CACHE, huge pages preferred */
|
|
SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
|
|
SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
|
|
};
|
|
|
|
extern int shmem_getpage(struct inode *inode, pgoff_t index,
|
|
struct page **pagep, enum sgp_type sgp);
|
|
|
|
static inline struct page *shmem_read_mapping_page(
|
|
struct address_space *mapping, pgoff_t index)
|
|
{
|
|
return shmem_read_mapping_page_gfp(mapping, index,
|
|
mapping_gfp_mask(mapping));
|
|
}
|
|
|
|
static inline bool shmem_file(struct file *file)
|
|
{
|
|
if (!IS_ENABLED(CONFIG_SHMEM))
|
|
return false;
|
|
if (!file || !file->f_mapping)
|
|
return false;
|
|
return shmem_mapping(file->f_mapping);
|
|
}
|
|
|
|
extern bool shmem_charge(struct inode *inode, long pages);
|
|
extern void shmem_uncharge(struct inode *inode, long pages);
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
|
|
extern bool shmem_huge_enabled(struct vm_area_struct *vma);
|
|
#else
|
|
static inline bool shmem_huge_enabled(struct vm_area_struct *vma)
|
|
{
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_USERFAULTFD
|
|
#ifdef CONFIG_SHMEM
|
|
extern int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
|
|
struct vm_area_struct *dst_vma,
|
|
unsigned long dst_addr,
|
|
unsigned long src_addr,
|
|
bool zeropage,
|
|
struct page **pagep);
|
|
#else /* !CONFIG_SHMEM */
|
|
#define shmem_mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr, \
|
|
src_addr, zeropage, pagep) ({ BUG(); 0; })
|
|
#endif /* CONFIG_SHMEM */
|
|
#endif /* CONFIG_USERFAULTFD */
|
|
|
|
#endif
|